parser/htmlparser/src/nsViewSourceHTML.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set sw=2 ts=2 et tw=78: */
   3 /* ***** BEGIN LICENSE BLOCK *****
   4  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5  *
   6  * The contents of this file are subject to the Mozilla Public License Version
   7  * 1.1 (the "License"); you may not use this file except in compliance with
   8  * the License. You may obtain a copy of the License at
   9  * http://www.mozilla.org/MPL/
  10  *
  11  * Software distributed under the License is distributed on an "AS IS" basis,
  12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13  * for the specific language governing rights and limitations under the
  14  * License.
  15  *
  16  * The Original Code is mozilla.org code.
  17  *
  18  * The Initial Developer of the Original Code is
  19  * Netscape Communications Corporation.
  20  * Portions created by the Initial Developer are Copyright (C) 1998
  21  * the Initial Developer. All Rights Reserved.
  22  *
  23  * Contributor(s):
  24  *   jce2@po.cwru.edu <Jason Eager>: Added pref to turn on/off
  25  *   Boris Zbarsky <bzbarsky@mit.edu>
  26  *   rbs@maths.uq.edu.au
  27  *   Andreas M. Schneider <clarence@clarence.de>
  28  *
  29  * Alternatively, the contents of this file may be used under the terms of
  30  * either of the GNU General Public License Version 2 or later (the "GPL"),
  31  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  32  * in which case the provisions of the GPL or the LGPL are applicable instead
  33  * of those above. If you wish to allow use of your version of this file only
  34  * under the terms of either the GPL or the LGPL, and not to allow others to
  35  * use your version of this file under the terms of the MPL, indicate your
  36  * decision by deleting the provisions above and replace them with the notice
  37  * and other provisions required by the GPL or the LGPL. If you do not delete
  38  * the provisions above, a recipient may use your version of this file under
  39  * the terms of any one of the MPL, the GPL or the LGPL.
  40  *
  41  * ***** END LICENSE BLOCK ***** */
  42
  43 /*
  44  * Set NS_VIEWSOURCE_TOKENS_PER_BLOCK to 0 to disable multi-block
  45  * output.  Multi-block output helps reduce the amount of bidi
  46  * processing we have to do on the resulting content model.
  47  */
  48 #define NS_VIEWSOURCE_TOKENS_PER_BLOCK 16
  49
  50 #ifdef RAPTOR_PERF_METRICS
  51 #  define START_TIMER()                    \
  52     if(mParser) mParser->mParseTime.Start(PR_FALSE); \
  53     if(mParser) mParser->mDTDTime.Start(PR_FALSE);
  54
  55 #  define STOP_TIMER()                     \
  56     if(mParser) mParser->mParseTime.Stop(); \
  57     if(mParser) mParser->mDTDTime.Stop();
  58
  59 #else
  60 #  define STOP_TIMER()
  61 #  define START_TIMER()
  62 #endif
  63
  64 #include "nsIAtom.h"
  65 #include "nsViewSourceHTML.h"
  66 #include "nsCRT.h"
  67 #include "nsParser.h"
  68 #include "nsScanner.h"
  69 #include "nsIParser.h"
  70 #include "nsDTDUtils.h"
  71 #include "nsIContentSink.h"
  72 #include "nsIHTMLContentSink.h"
  73 #include "nsHTMLTokenizer.h"
  74 #include "nsIPrefService.h"
  75 #include "nsIPrefBranch.h"
  76 #include "nsUnicharUtils.h"
  77 #include "nsPrintfCString.h"
  78 #include "nsNetUtil.h"
  79
  80 #include "nsIServiceManager.h"
  81
  82 #include "nsElementTable.h"
  83
  84 #include "prenv.h"  //this is here for debug reasons...
  85 #include "prtypes.h"  //this is here for debug reasons...
  86 #include "prio.h"
  87 #include "plstr.h"
  88 #include "prmem.h"
  89
  90 #ifdef RAPTOR_PERF_METRICS
  91 #include "stopwatch.h"
  92 Stopwatch vsTimer;
  93 #endif
  94
  95
  96
  97 // Define this to dump the viewsource stuff to a file
  98 //#define DUMP_TO_FILE
  99 #ifdef DUMP_TO_FILE
 100 #include <stdio.h>
 101   FILE* gDumpFile=0;
 102   static const char* gDumpFileName = "/tmp/viewsource.html";
 103 //  static const char* gDumpFileName = "\\temp\\viewsource.html";
 104 #endif // DUMP_TO_FILE
 105
 106 // bug 22022 - these are used to toggle 'Wrap Long Lines' on the viewsource
 107 // window by selectively setting/unsetting the following class defined in
 108 // viewsource.css; the setting is remembered between invocations using a pref.
 109 static const char kBodyId[] = "viewsource";
 110 static const char kBodyClassWrap[] = "wrap";
 111
 112 NS_IMPL_ISUPPORTS1(CViewSourceHTML, nsIDTD)
 113
 114 /********************************************
 115  ********************************************/
 116
 117 enum {
 118   kStartTag = 0,
 119   kEndTag,
 120   kComment,
 121   kCData,
 122   kDoctype,
 123   kPI,
 124   kEntity,
 125   kText,
 126   kAttributeName,
 127   kAttributeValue,
 128   kMarkupDecl
 129 };
 130
 131 static const char* const kElementClasses[] = {
 132   "start-tag",
 133   "end-tag",
 134   "comment",
 135   "cdata",
 136   "doctype",
 137   "pi",
 138   "entity",
 139   "text",
 140   "attribute-name",
 141   "attribute-value",
 142   "markupdeclaration"
 143 };
 144
 145 static const char* const kBeforeText[] = {
 146   "<",
 147   "</",
 148   "",
 149   "",
 150   "",
 151   "",
 152   "&",
 153   "",
 154   "",
 155   "=",
 156   ""
 157 };
 158
 159 static const char* const kAfterText[] = {
 160   ">",
 161   ">",
 162   "",
 163   "",
 164   "",
 165   "",
 166   "",
 167   "",
 168   "",
 169   "",
 170   ""
 171 };
 172
 173 #ifdef DUMP_TO_FILE
 174 static const char* const kDumpFileBeforeText[] = {
 175   "&lt;",
 176   "&lt;/",
 177   "",
 178   "",
 179   "",
 180   "",
 181   "&amp;",
 182   "",
 183   "",
 184   "=",
 185   ""
 186 };
 187
 188 static const char* const kDumpFileAfterText[] = {
 189   "&gt;",
 190   "&gt;",
 191   "",
 192   "",
 193   "",
 194   "",
 195   "",
 196   "",
 197   "",
 198   "",
 199   ""
 200 };
 201 #endif // DUMP_TO_FILE
 202
 203 /**
 204  *  Default constructor
 205  *
 206  *  @update  gess 4/9/98
 207  *  @param
 208  *  @return
 209  */
 210 CViewSourceHTML::CViewSourceHTML()
 211 {
 212   mSyntaxHighlight = PR_FALSE;
 213   mWrapLongLines = PR_FALSE;
 214   nsCOMPtr<nsIPrefBranch> prefBranch(do_GetService(NS_PREFSERVICE_CONTRACTID));
 215   if (prefBranch) {
 216     PRBool temp;
 217     nsresult rv;
 218     rv = prefBranch->GetBoolPref("view_source.syntax_highlight", &temp);
 219     mSyntaxHighlight = NS_SUCCEEDED(rv) ? temp : PR_TRUE;
 220
 221     rv = prefBranch->GetBoolPref("view_source.wrap_long_lines", &temp);
 222     mWrapLongLines = NS_SUCCEEDED(rv) ? temp : PR_FALSE;
 223   }
 224
 225   mParser = 0;
 226   mSink = 0;
 227   mLineNumber = 1;
 228   mTokenizer = 0;
 229   mDocType=eHTML_Quirks;
 230   mHasOpenRoot=PR_FALSE;
 231   mHasOpenBody=PR_FALSE;
 232
 233   mTokenCount=0;
 234
 235 #ifdef DUMP_TO_FILE
 236   gDumpFile = fopen(gDumpFileName,"w");
 237 #endif // DUMP_TO_FILE
 238
 239 }
 240
 241
 242
 243 /**
 244  *  Default destructor
 245  *
 246  *  @update  gess 4/9/98
 247  *  @param
 248  *  @return
 249  */
 250 CViewSourceHTML::~CViewSourceHTML(){
 251   mParser=0; //just to prove we destructed...
 252 }
 253
 254 /**
 255   * The parser uses a code sandwich to wrap the parsing process. Before
 256   * the process begins, WillBuildModel() is called. Afterwards the parser
 257   * calls DidBuildModel().
 258   * @update rickg 03.20.2000
 259   * @param  aParserContext
 260   * @param  aSink
 261   * @return error code (almost always 0)
 262   */
 263 nsresult CViewSourceHTML::WillBuildModel(const CParserContext& aParserContext,
 264                                          nsITokenizer* aTokenizer,
 265                                          nsIContentSink* aSink){
 266
 267   nsresult result=NS_OK;
 268
 269 #ifdef RAPTOR_PERF_METRICS
 270   vsTimer.Reset();
 271   NS_START_STOPWATCH(vsTimer);
 272 #endif
 273
 274   STOP_TIMER();
 275   mSink=(nsIHTMLContentSink*)aSink;
 276
 277   if((!aParserContext.mPrevContext) && (mSink)) {
 278
 279     nsAString & contextFilename = aParserContext.mScanner->GetFilename();
 280     mFilename = Substring(contextFilename,
 281                           12, // The length of "view-source:"
 282                           contextFilename.Length() - 12);
 283
 284     mDocType=aParserContext.mDocType;
 285     mMimeType=aParserContext.mMimeType;
 286     mDTDMode=aParserContext.mDTDMode;
 287     mParserCommand=aParserContext.mParserCommand;
 288     mTokenizer = aTokenizer;
 289
 290 #ifdef DUMP_TO_FILE
 291     if (gDumpFile) {
 292
 293       fprintf(gDumpFile, "<html>\n");
 294       fprintf(gDumpFile, "<head>\n");
 295       fprintf(gDumpFile, "<title>");
 296       fprintf(gDumpFile, "Source of: ");
 297       fputs(NS_ConvertUTF16toUTF8(mFilename).get(), gDumpFile);
 298       fprintf(gDumpFile, "</title>\n");
 299       fprintf(gDumpFile, "<link rel=\"stylesheet\" type=\"text/css\" href=\"resource://gre/res/viewsource.css\">\n");
 300       fprintf(gDumpFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">\n");
 301       fprintf(gDumpFile, "</head>\n");
 302       fprintf(gDumpFile, "<body id=\"viewsource\">\n");
 303       fprintf(gDumpFile, "<pre id=\"line1\">\n");
 304     }
 305 #endif //DUMP_TO_FILE
 306   }
 307
 308
 309   if(eViewSource!=aParserContext.mParserCommand)
 310     mDocType=ePlainText;
 311   else mDocType=aParserContext.mDocType;
 312
 313   mLineNumber = 1;
 314   // Munge the DTD mode so that the document will be in standards mode even if
 315   // the original source was quirks.  The CONST_CAST is evil, but the other
 316   // options seem to be:
 317   // 1) Change the WillBuildModel signature to take an nsIParser so that we can
 318   //    push a new parser context right here.
 319   // 2) Make some assumptions about the exact class of mSink and get at the
 320   //    document that way.
 321   // #1 doesn't seem worth it, and #2 is even more evil, since we plan to reset
 322   // the DTD mode right back to what it was before, let's risk this.
 323   CParserContext& parserContext = const_cast<CParserContext&>(aParserContext);
 324   parserContext.mDTDMode = eDTDMode_full_standards;
 325   result = mSink->WillBuildModel();
 326   // And reset the DTD mode back to the right one
 327   parserContext.mDTDMode = mDTDMode;
 328   START_TIMER();
 329   return result;
 330 }
 331
 332 /**
 333   * The parser uses a code sandwich to wrap the parsing process. Before
 334   * the process begins, WillBuildModel() is called. Afterwards the parser
 335   * calls DidBuildModel().
 336   * @update gess5/18/98
 337   * @param  aFilename is the name of the file being parsed.
 338   * @return error code (almost always 0)
 339   */
 340 NS_IMETHODIMP CViewSourceHTML::BuildModel(nsIParser* aParser,nsITokenizer* aTokenizer,nsITokenObserver* anObserver,nsIContentSink* aSink) {
 341   nsresult result=NS_OK;
 342
 343   if(aTokenizer && aParser) {
 344
 345     nsITokenizer*  oldTokenizer=mTokenizer;
 346     mTokenizer=aTokenizer;
 347     nsTokenAllocator* theAllocator=mTokenizer->GetTokenAllocator();
 348
 349     if(!mHasOpenRoot) {
 350       // For the stack-allocated tokens below, it's safe to pass a null
 351       // token allocator, because there are no attributes on the tokens.
 352       CStartToken htmlToken(NS_LITERAL_STRING("HTML"), eHTMLTag_html);
 353       nsCParserNode htmlNode(&htmlToken, 0/*stack token*/);
 354       mSink->OpenContainer(htmlNode);
 355
 356       CStartToken headToken(NS_LITERAL_STRING("HEAD"), eHTMLTag_head);
 357       nsCParserNode headNode(&headToken, 0/*stack token*/);
 358       mSink->OpenContainer(headNode);
 359
 360       CStartToken titleToken(NS_LITERAL_STRING("TITLE"), eHTMLTag_title);
 361       nsCParserNode titleNode(&titleToken, 0/*stack token*/);
 362       mSink->OpenContainer(titleNode);
 363
 364       // Note that XUL will automatically add the prefix "Source of: "
 365       if (StringBeginsWith(mFilename, NS_LITERAL_STRING("data:")) &&
 366           mFilename.Length() > 50) {
 367         nsAutoString dataFilename(Substring(mFilename, 0, 50));
 368         dataFilename.AppendLiteral("...");
 369         CTextToken titleText(dataFilename);
 370         nsCParserNode titleTextNode(&titleText, 0/*stack token*/);
 371         mSink->AddLeaf(titleTextNode);
 372       } else {
 373         CTextToken titleText(mFilename);
 374         nsCParserNode titleTextNode(&titleText, 0/*stack token*/);
 375         mSink->AddLeaf(titleTextNode);
 376       }
 377
 378       mSink->CloseContainer(eHTMLTag_title);
 379
 380       if (theAllocator) {
 381         CStartToken* theToken=
 382           static_cast<CStartToken*>
 383                      (theAllocator->CreateTokenOfType(eToken_start,
 384                                                          eHTMLTag_link,
 385                                                          NS_LITERAL_STRING("LINK")));
 386         if (theToken) {
 387           nsCParserStartNode theNode(theToken, theAllocator);
 388
 389           AddAttrToNode(theNode, theAllocator,
 390                         NS_LITERAL_STRING("rel"),
 391                         NS_LITERAL_STRING("stylesheet"));
 392
 393           AddAttrToNode(theNode, theAllocator,
 394                         NS_LITERAL_STRING("type"),
 395                         NS_LITERAL_STRING("text/css"));
 396
 397           AddAttrToNode(theNode, theAllocator,
 398                         NS_LITERAL_STRING("href"),
 399                         NS_LITERAL_STRING("resource://gre/res/viewsource.css"));
 400
 401           mSink->AddLeaf(theNode);
 402         }
 403         IF_FREE(theToken, theAllocator);
 404       }
 405
 406       result = mSink->CloseContainer(eHTMLTag_head);
 407       if(NS_SUCCEEDED(result)) {
 408         mHasOpenRoot = PR_TRUE;
 409       }
 410     }
 411     if (NS_SUCCEEDED(result) && !mHasOpenBody) {
 412       if (theAllocator) {
 413         CStartToken* bodyToken=
 414           static_cast<CStartToken*>
 415                      (theAllocator->CreateTokenOfType(eToken_start,
 416                                                          eHTMLTag_body,
 417                                                          NS_LITERAL_STRING("BODY")));
 418         if (bodyToken) {
 419           nsCParserStartNode bodyNode(bodyToken, theAllocator);
 420
 421           AddAttrToNode(bodyNode, theAllocator,
 422                         NS_LITERAL_STRING("id"),
 423                         NS_ConvertASCIItoUTF16(kBodyId));
 424
 425           if (mWrapLongLines) {
 426             AddAttrToNode(bodyNode, theAllocator,
 427                           NS_LITERAL_STRING("class"),
 428                           NS_ConvertASCIItoUTF16(kBodyClassWrap));
 429           }
 430           result = mSink->OpenContainer(bodyNode);
 431           if(NS_SUCCEEDED(result)) mHasOpenBody=PR_TRUE;
 432         }
 433         IF_FREE(bodyToken, theAllocator);
 434
 435         if (NS_SUCCEEDED(result)) {
 436           CStartToken* preToken =
 437             static_cast<CStartToken*>
 438                        (theAllocator->CreateTokenOfType(eToken_start,
 439                                                            eHTMLTag_pre,
 440                                                            NS_LITERAL_STRING("PRE")));
 441           if (preToken) {
 442             nsCParserStartNode preNode(preToken, theAllocator);
 443             AddAttrToNode(preNode, theAllocator,
 444                           NS_LITERAL_STRING("id"),
 445                           NS_LITERAL_STRING("line1"));
 446             result = mSink->OpenContainer(preNode);
 447           } else {
 448             result = NS_ERROR_OUT_OF_MEMORY;
 449           }
 450           IF_FREE(preToken, theAllocator);
 451         }
 452       }
 453     }
 454
 455     while(NS_SUCCEEDED(result)){
 456       CToken* theToken=mTokenizer->PopToken();
 457       if(theToken) {
 458         result=HandleToken(theToken,aParser);
 459         if(NS_SUCCEEDED(result)) {
 460           IF_FREE(theToken, mTokenizer->GetTokenAllocator());
 461           if (mParser->CanInterrupt() &&
 462               mSink->DidProcessAToken() == NS_ERROR_HTMLPARSER_INTERRUPTED) {
 463             result = NS_ERROR_HTMLPARSER_INTERRUPTED;
 464             break;
 465           }
 466         } else {
 467           mTokenizer->PushTokenFront(theToken);
 468         }
 469       }
 470       else break;
 471     }//while
 472
 473     mTokenizer=oldTokenizer;
 474   }
 475   else result=NS_ERROR_HTMLPARSER_BADTOKENIZER;
 476   return result;
 477 }
 478
 479 /**
 480  * Call this to start a new PRE block.  See bug 86355 for why this
 481  * makes some pages much faster.
 482  */
 483 void CViewSourceHTML::StartNewPreBlock(void){
 484   CEndToken endToken(eHTMLTag_pre);
 485   nsCParserNode endNode(&endToken, 0/*stack token*/);
 486   mSink->CloseContainer(eHTMLTag_pre);
 487
 488   nsTokenAllocator* theAllocator = mTokenizer->GetTokenAllocator();
 489   if (!theAllocator) {
 490     return;
 491   }
 492
 493   CStartToken* theToken =
 494     static_cast<CStartToken*>
 495                (theAllocator->CreateTokenOfType(eToken_start,
 496                                                    eHTMLTag_pre,
 497                                                    NS_LITERAL_STRING("PRE")));
 498   if (!theToken) {
 499     return;
 500   }
 501
 502   nsCParserStartNode startNode(theToken, theAllocator);
 503   AddAttrToNode(startNode, theAllocator,
 504                 NS_LITERAL_STRING("id"),
 505                 NS_ConvertASCIItoUTF16(nsPrintfCString("line%d", mLineNumber)));
 506   mSink->OpenContainer(startNode);
 507   IF_FREE(theToken, theAllocator);
 508
 509 #ifdef DUMP_TO_FILE
 510   if (gDumpFile) {
 511     fprintf(gDumpFile, "</pre>\n");
 512     fprintf(gDumpFile, "<pre id=\"line%d\">\n", mLineNumber);
 513   }
 514 #endif // DUMP_TO_FILE
 515
 516   mTokenCount = 0;
 517 }
 518
 519 void CViewSourceHTML::AddAttrToNode(nsCParserStartNode& aNode,
 520                                     nsTokenAllocator* aAllocator,
 521                                     const nsAString& aAttrName,
 522                                     const nsAString& aAttrValue)
 523 {
 524   NS_PRECONDITION(aAllocator, "Must have a token allocator!");
 525
 526   CAttributeToken* theAttr =
 527     (CAttributeToken*) aAllocator->CreateTokenOfType(eToken_attribute,
 528                                                      eHTMLTag_unknown,
 529                                                      aAttrValue);
 530   if (!theAttr) {
 531     NS_ERROR("Failed to allocate attribute token");
 532     return;
 533   }
 534
 535   theAttr->SetKey(aAttrName);
 536   aNode.AddAttribute(theAttr);
 537
 538   // Parser nodes assume that they are being handed a ref when AddAttribute is
 539   // called, unlike Init() and construction, when they actually addref the
 540   // incoming token.  Do NOT release here unless this setup changes.
 541 }
 542
 543 /**
 544  *
 545  * @update  gess5/18/98
 546  * @param
 547  * @return
 548  */
 549 NS_IMETHODIMP CViewSourceHTML::DidBuildModel(nsresult anErrorCode,PRBool aNotifySink,nsIParser* aParser,nsIContentSink* aSink){
 550   nsresult result= NS_OK;
 551
 552   //ADD CODE HERE TO CLOSE OPEN CONTAINERS...
 553
 554   if(aParser){
 555
 556     mParser=(nsParser*)aParser;  //debug XXX
 557     STOP_TIMER();
 558
 559     mSink=(nsIHTMLContentSink*)aParser->GetContentSink();
 560     if((aNotifySink) && (mSink)) {
 561         //now let's close automatically auto-opened containers...
 562
 563 #ifdef DUMP_TO_FILE
 564       if(gDumpFile) {
 565         fprintf(gDumpFile, "</pre>\n");
 566         fprintf(gDumpFile, "</body>\n");
 567         fprintf(gDumpFile, "</html>\n");
 568         fclose(gDumpFile);
 569       }
 570 #endif // DUMP_TO_FILE
 571
 572       if(ePlainText!=mDocType) {
 573         mSink->CloseContainer(eHTMLTag_pre);
 574         mSink->CloseContainer(eHTMLTag_body);
 575         mSink->CloseContainer(eHTMLTag_html);
 576       }
 577       result = mSink->DidBuildModel();
 578     }
 579
 580     START_TIMER();
 581
 582   }
 583
 584 #ifdef RAPTOR_PERF_METRICS
 585   NS_STOP_STOPWATCH(vsTimer);
 586   printf("viewsource timer: ");
 587   vsTimer.Print();
 588   printf("\n");
 589 #endif
 590
 591   return result;
 592 }
 593
 594 /**
 595  * Use this id you want to stop the building content model
 596  * --------------[ Sets DTD to STOP mode ]----------------
 597  * It's recommended to use this method in accordance with
 598  * the parser's terminate() method.
 599  *
 600  * @update  harishd 07/22/99
 601  * @param
 602  * @return
 603  */
 604 NS_IMETHODIMP_(void)
 605 CViewSourceHTML::Terminate() {
 606 }
 607
 608 NS_IMETHODIMP_(PRInt32)
 609 CViewSourceHTML::GetType() {
 610   return NS_IPARSER_FLAG_HTML;
 611 }
 612
 613 /**
 614  *
 615  * @update  gess5/18/98
 616  * @param
 617  * @return
 618  */
 619 NS_IMETHODIMP CViewSourceHTML::WillResumeParse(nsIContentSink* aSink){
 620   nsresult result = NS_OK;
 621   if(mSink) {
 622     result = mSink->WillResume();
 623   }
 624   return result;
 625 }
 626
 627 /**
 628  *
 629  * @update  gess5/18/98
 630  * @param
 631  * @return
 632  */
 633 NS_IMETHODIMP CViewSourceHTML::WillInterruptParse(nsIContentSink* aSink){
 634   nsresult result = NS_OK;
 635   if(mSink) {
 636     result = mSink->WillInterrupt();
 637   }
 638   return result;
 639 }
 640
 641 /**
 642  * Called by the parser to enable/disable dtd verification of the
 643  * internal context stack.
 644  * @update  gess 7/23/98
 645  * @param
 646  * @return
 647  */
 648 void CViewSourceHTML::SetVerification(PRBool aEnabled)
 649 {
 650 }
 651
 652 /**
 653  *  This method is called to determine whether or not a tag
 654  *  of one type can contain a tag of another type.
 655  *
 656  *  @update  gess 3/25/98
 657  *  @param   aParent -- int tag of parent container
 658  *  @param   aChild -- int tag of child container
 659  *  @return  PR_TRUE if parent can contain child
 660  */
 661 PRBool CViewSourceHTML::CanContain(PRInt32 aParent,PRInt32 aChild) const{
 662   PRBool result=PR_TRUE;
 663   return result;
 664 }
 665
 666 /**
 667  *  This method gets called to determine whether a given
 668  *  tag is itself a container
 669  *
 670  *  @update  gess 3/25/98
 671  *  @param   aTag -- tag to test for containership
 672  *  @return  PR_TRUE if given tag can contain other tags
 673  */
 674 PRBool CViewSourceHTML::IsContainer(PRInt32 aTag) const{
 675   PRBool result=PR_TRUE;
 676   return result;
 677 }
 678
 679 /**
 680  *  This method gets called when a tag needs to write it's attributes
 681  *
 682  *  @update  gess 3/25/98
 683  *  @param
 684  *  @return  result status
 685  */
 686 nsresult CViewSourceHTML::WriteAttributes(const nsAString& tagName,
 687                                           nsTokenAllocator* allocator,
 688                                           PRInt32 attrCount, PRBool aOwnerInError) {
 689   nsresult result=NS_OK;
 690
 691   if(attrCount){ //go collect the attributes...
 692     int attr = 0;
 693     for(attr = 0; attr < attrCount; ++attr){
 694       CToken* theToken = mTokenizer->PeekToken();
 695       if(theToken)  {
 696         eHTMLTokenTypes theType = eHTMLTokenTypes(theToken->GetTokenType());
 697         if(eToken_attribute == theType){
 698           mTokenizer->PopToken(); //pop it for real...
 699           mTokenNode.AddAttribute(theToken);  //and add it to the node.
 700
 701           CAttributeToken* theAttrToken = (CAttributeToken*)theToken;
 702           const nsSubstring& theKey = theAttrToken->GetKey();
 703
 704           // The attribute is only in error if its owner is NOT in error.
 705           const PRBool attributeInError =
 706             !aOwnerInError && theAttrToken->IsInError();
 707
 708           result = WriteTag(kAttributeName,theKey,0,attributeInError);
 709           const nsSubstring& theValue = theAttrToken->GetValue();
 710
 711           if(!theValue.IsEmpty() || theAttrToken->mHasEqualWithoutValue){
 712             if (IsUrlAttribute(tagName, theKey, theValue)) {
 713               WriteHrefAttribute(allocator, theValue);
 714             } else {
 715               WriteTag(kAttributeValue,theValue,0,attributeInError);
 716             }
 717           }
 718         }
 719       }
 720       else return kEOF;
 721     }
 722   }
 723
 724   return result;
 725 }
 726
 727 /**
 728  *  This method gets called when a tag needs to be sent out
 729  *
 730  *  @update  gess 3/25/98
 731  *  @param
 732  *  @return  result status
 733  */
 734 nsresult CViewSourceHTML::WriteTag(PRInt32 aTagType,const nsSubstring & aText,PRInt32 attrCount,PRBool aTagInError) {
 735   nsresult result=NS_OK;
 736
 737   // adjust line number to what it will be after we finish writing this tag
 738   // XXXbz life here sucks.  We can't use the GetNewlineCount on the token,
 739   // because Text tokens in <style>, <script>, etc lie through their teeth.
 740   // On the other hand, the parser messes up newline counting in some token
 741   // types (bug 137315).  So our line numbers will disagree with the parser's
 742   // in some cases...
 743   mLineNumber += aText.CountChar(PRUnichar('\n'));
 744
 745   nsTokenAllocator* theAllocator=mTokenizer->GetTokenAllocator();
 746   NS_ASSERTION(0!=theAllocator,"Error: no allocator");
 747   if(0==theAllocator)
 748     return NS_ERROR_FAILURE;
 749
 750   // Highlight all parts of all erroneous tags.
 751   if (mSyntaxHighlight && aTagInError) {
 752     CStartToken* theTagToken=
 753       static_cast<CStartToken*>
 754                  (theAllocator->CreateTokenOfType(eToken_start,
 755                                                      eHTMLTag_span,
 756                                                      NS_LITERAL_STRING("SPAN")));
 757     NS_ENSURE_TRUE(theTagToken, NS_ERROR_OUT_OF_MEMORY);
 758     mErrorNode.Init(theTagToken, theAllocator);
 759     AddAttrToNode(mErrorNode, theAllocator,
 760                   NS_LITERAL_STRING("class"),
 761                   NS_LITERAL_STRING("error"));
 762     mSink->OpenContainer(mErrorNode);
 763     IF_FREE(theTagToken, theAllocator);
 764 #ifdef DUMP_TO_FILE
 765     if (gDumpFile) {
 766       fprintf(gDumpFile, "<span class=\"error\">");
 767     }
 768 #endif
 769   }
 770
 771   if (kBeforeText[aTagType][0] != 0) {
 772     NS_ConvertASCIItoUTF16 beforeText(kBeforeText[aTagType]);
 773     mITextToken.SetIndirectString(beforeText);
 774     nsCParserNode theNode(&mITextToken, 0/*stack token*/);
 775     mSink->AddLeaf(theNode);
 776   }
 777 #ifdef DUMP_TO_FILE
 778   if (gDumpFile && kDumpFileBeforeText[aTagType][0])
 779     fprintf(gDumpFile, kDumpFileBeforeText[aTagType]);
 780 #endif // DUMP_TO_FILE
 781
 782   if (mSyntaxHighlight && aTagType != kText) {
 783     CStartToken* theTagToken=
 784       static_cast<CStartToken*>
 785                  (theAllocator->CreateTokenOfType(eToken_start,
 786                                                      eHTMLTag_span,
 787                                                      NS_LITERAL_STRING("SPAN")));
 788     NS_ENSURE_TRUE(theTagToken, NS_ERROR_OUT_OF_MEMORY);
 789     mStartNode.Init(theTagToken, theAllocator);
 790     AddAttrToNode(mStartNode, theAllocator,
 791                   NS_LITERAL_STRING("class"),
 792                   NS_ConvertASCIItoUTF16(kElementClasses[aTagType]));
 793     mSink->OpenContainer(mStartNode);  //emit <starttag>...
 794     IF_FREE(theTagToken, theAllocator);
 795 #ifdef DUMP_TO_FILE
 796     if (gDumpFile) {
 797       fprintf(gDumpFile, "<span class=\"");
 798       fprintf(gDumpFile, kElementClasses[aTagType]);
 799       fprintf(gDumpFile, "\">");
 800     }
 801 #endif // DUMP_TO_FILE
 802   }
 803
 804   STOP_TIMER();
 805
 806   mITextToken.SetIndirectString(aText);  //now emit the tag name...
 807
 808   nsCParserNode theNode(&mITextToken, 0/*stack token*/);
 809   mSink->AddLeaf(theNode);
 810 #ifdef DUMP_TO_FILE
 811   if (gDumpFile) {
 812     fputs(NS_ConvertUTF16toUTF8(aText).get(), gDumpFile);
 813   }
 814 #endif // DUMP_TO_FILE
 815
 816   if (mSyntaxHighlight && aTagType != kText) {
 817     mStartNode.ReleaseAll();
 818     mSink->CloseContainer(eHTMLTag_span);  //emit </endtag>...
 819 #ifdef DUMP_TO_FILE
 820     if (gDumpFile)
 821       fprintf(gDumpFile, "</span>");
 822 #endif //DUMP_TO_FILE
 823   }
 824
 825   if(attrCount){
 826     result=WriteAttributes(aText, theAllocator, attrCount, aTagInError);
 827   }
 828
 829   // Tokens are set in error if their ending > is not there, so don't output
 830   // the after-text
 831   if (!aTagInError && kAfterText[aTagType][0] != 0) {
 832     NS_ConvertASCIItoUTF16 afterText(kAfterText[aTagType]);
 833     mITextToken.SetIndirectString(afterText);
 834     nsCParserNode theNode(&mITextToken, 0/*stack token*/);
 835     mSink->AddLeaf(theNode);
 836   }
 837 #ifdef DUMP_TO_FILE
 838   if (!aTagInError && gDumpFile && kDumpFileAfterText[aTagType][0])
 839     fprintf(gDumpFile, kDumpFileAfterText[aTagType]);
 840 #endif // DUMP_TO_FILE
 841
 842   if (mSyntaxHighlight && aTagInError) {
 843     mErrorNode.ReleaseAll();
 844     mSink->CloseContainer(eHTMLTag_span);  //emit </endtag>...
 845 #ifdef DUMP_TO_FILE
 846     if (gDumpFile)
 847       fprintf(gDumpFile, "</span>");
 848 #endif //DUMP_TO_FILE
 849   }
 850
 851   START_TIMER();
 852
 853   return result;
 854 }
 855
 856 /**
 857  *
 858  *  @update  gess 3/25/98
 859  *  @param   aToken -- token object to be put into content model
 860  *  @return  0 if all is well; non-zero is an error
 861  */
 862 NS_IMETHODIMP CViewSourceHTML::HandleToken(CToken* aToken,nsIParser* aParser)
 863 {
 864   nsresult        result=NS_OK;
 865   CHTMLToken*     theToken= (CHTMLToken*)(aToken);
 866   eHTMLTokenTypes theType= (eHTMLTokenTypes)theToken->GetTokenType();
 867
 868   mParser=(nsParser*)aParser;
 869   mSink=(nsIHTMLContentSink*)aParser->GetContentSink();
 870
 871   mTokenNode.Init(theToken, mTokenizer->GetTokenAllocator());
 872
 873   switch(theType) {
 874
 875     case eToken_start:
 876       {
 877         const nsSubstring& startValue = aToken->GetStringValue();
 878         result = WriteTag(kStartTag,startValue,aToken->GetAttributeCount(),aToken->IsInError());
 879
 880         if((ePlainText!=mDocType) && mParser && (NS_OK==result)) {
 881           result = mSink->NotifyTagObservers(&mTokenNode);
 882         }
 883       }
 884       break;
 885
 886     case eToken_end:
 887       {
 888         const nsSubstring& endValue = aToken->GetStringValue();
 889         result = WriteTag(kEndTag,endValue,aToken->GetAttributeCount(),aToken->IsInError());
 890       }
 891       break;
 892
 893     case eToken_cdatasection:
 894       {
 895         nsAutoString theStr;
 896         theStr.AssignLiteral("<!");
 897         theStr.Append(aToken->GetStringValue());
 898         if (!aToken->IsInError()) {
 899           theStr.AppendLiteral(">");
 900         }
 901         result=WriteTag(kCData,theStr,0,aToken->IsInError());
 902       }
 903       break;
 904
 905     case eToken_markupDecl:
 906       {
 907         nsAutoString theStr;
 908         theStr.AssignLiteral("<!");
 909         theStr.Append(aToken->GetStringValue());
 910         if (!aToken->IsInError()) {
 911           theStr.AppendLiteral(">");
 912         }
 913         result=WriteTag(kMarkupDecl,theStr,0,aToken->IsInError());
 914       }
 915       break;
 916
 917     case eToken_comment:
 918       {
 919         nsAutoString theStr;
 920         aToken->AppendSourceTo(theStr);
 921         result=WriteTag(kComment,theStr,0,aToken->IsInError());
 922       }
 923       break;
 924
 925     case eToken_doctypeDecl:
 926       {
 927         const nsSubstring& doctypeValue = aToken->GetStringValue();
 928         result=WriteTag(kDoctype,doctypeValue,0,aToken->IsInError());
 929       }
 930       break;
 931
 932     case eToken_newline:
 933       {
 934         const nsSubstring& newlineValue = aToken->GetStringValue();
 935         result=WriteTag(kText,newlineValue,0,PR_FALSE);
 936         ++mTokenCount;
 937         if (NS_VIEWSOURCE_TOKENS_PER_BLOCK > 0 &&
 938             mTokenCount > NS_VIEWSOURCE_TOKENS_PER_BLOCK) {
 939           StartNewPreBlock();
 940         }
 941       }
 942       break;
 943
 944     case eToken_whitespace:
 945       {
 946         const nsSubstring& wsValue = aToken->GetStringValue();
 947         result=WriteTag(kText,wsValue,0,PR_FALSE);
 948         ++mTokenCount;
 949         if (NS_VIEWSOURCE_TOKENS_PER_BLOCK > 0 &&
 950             mTokenCount > NS_VIEWSOURCE_TOKENS_PER_BLOCK &&
 951             !wsValue.IsEmpty()) {
 952           PRUnichar ch = wsValue.Last();
 953           if (ch == kLF || ch == kCR)
 954             StartNewPreBlock();
 955         }
 956       }
 957       break;
 958
 959     case eToken_text:
 960       {
 961         const nsSubstring& str = aToken->GetStringValue();
 962         result=WriteTag(kText,str,aToken->GetAttributeCount(),aToken->IsInError());
 963         ++mTokenCount;
 964         if (NS_VIEWSOURCE_TOKENS_PER_BLOCK > 0 &&
 965             mTokenCount > NS_VIEWSOURCE_TOKENS_PER_BLOCK && !str.IsEmpty()) {
 966           PRUnichar ch = str.Last();
 967           if (ch == kLF || ch == kCR)
 968             StartNewPreBlock();
 969         }
 970       }
 971
 972       break;
 973
 974     case eToken_entity:
 975       result=WriteTag(kEntity,aToken->GetStringValue(),0,aToken->IsInError());
 976       break;
 977
 978     case eToken_instruction:
 979       result=WriteTag(kPI,aToken->GetStringValue(),0,aToken->IsInError());
 980       break;
 981
 982     default:
 983       result=NS_OK;
 984   }//switch
 985
 986   mTokenNode.ReleaseAll();
 987
 988   return result;
 989 }
 990
 991 PRBool CViewSourceHTML::IsUrlAttribute(const nsAString& tagName,
 992                                        const nsAString& attrName,
 993                                        const nsAString& attrValue) {
 994   const nsSubstring &trimmedAttrName = TrimTokenValue(attrName);
 995
 996   PRBool isHref = trimmedAttrName.LowerCaseEqualsLiteral("href");
 997   PRBool isSrc = !isHref && trimmedAttrName.LowerCaseEqualsLiteral("src");
 998
 999   // If this is the HREF attribute of a BASE element, then update the base URI.
1000   // This doesn't feel like the ideal place for this, but the alternatives don't
1001   // seem all that nice either.
1002   if (isHref && tagName.LowerCaseEqualsLiteral("base")) {
1003     const nsSubstring& baseSpec = TrimTokenValue(attrValue);
1004     SetBaseURI(baseSpec);
1005   }
1006
1007   return isHref || isSrc;
1008 }
1009
1010 void CViewSourceHTML::WriteHrefAttribute(nsTokenAllocator* allocator,
1011                                          const nsAString& href) {
1012   // The "href" will typically contain not only the href proper, but the single
1013   // or double quotes and often some surrounding whitespace as well.  Find the
1014   // location of the href proper inside the string.
1015   nsAString::const_iterator startProper, endProper;
1016   href.BeginReading(startProper);
1017   href.EndReading(endProper);
1018   TrimTokenValue(startProper, endProper);
1019
1020   // Break the href into three parts, the preceding text, the href proper, and
1021   // the succeeding text.
1022   nsAString::const_iterator start, end;
1023   href.BeginReading(start);
1024   href.EndReading(end);
1025   const nsAString &precedingText = Substring(start, startProper);
1026   const nsAString &hrefProper = Substring(startProper, endProper);
1027   const nsAString &succeedingText = Substring(endProper, end);
1028
1029   nsAutoString fullPrecedingText;
1030   fullPrecedingText.Assign(kEqual);
1031   fullPrecedingText.Append(precedingText);
1032
1033   // Regular URLs and view-source URLs work the same way for .js and .css files.
1034   // However, if the user follows a link in the view source window to a .html
1035   // file (i.e. the HREF in an A tag), then presumably they will expect to see
1036   // the *source* of that new page, not the rendered version.  So for now we
1037   // just slap a "view-source:" at the beginning of each URL.  There are two
1038   // big downsides to doing it this way -- we must make relative URLs into
1039   // absolute URLs before we can turn them into view-source URLs, and links
1040   // to images don't work right -- nobody wants to see the bytes constituting a
1041   // PNG rendered as text.  A smarter view-source handler might be able to deal
1042   // with the latter problem.
1043
1044   // Construct a "view-source" URL for the HREF.
1045   nsAutoString viewSourceUrl;
1046   CreateViewSourceURL(hrefProper, viewSourceUrl);
1047
1048   // Construct the HTML that will represent the HREF.
1049   NS_NAMED_LITERAL_STRING(HREF, "href");
1050   if (fullPrecedingText.Length() > 0) {
1051     WriteTextInSpan(fullPrecedingText, allocator, EmptyString(), EmptyString());
1052   }
1053   WriteTextInAnchor(hrefProper, allocator, HREF, viewSourceUrl);
1054   if (succeedingText.Length() > 0) {
1055     WriteTextInSpan(succeedingText, allocator, EmptyString(), EmptyString());
1056   }
1057 }
1058
1059 nsresult CViewSourceHTML::CreateViewSourceURL(const nsAString& linkUrl,
1060                                               nsString& viewSourceUrl) {
1061   nsCOMPtr<nsIURI> baseURI;
1062   nsCOMPtr<nsIURI> hrefURI;
1063   nsresult rv;
1064
1065   // Default the view source URL to the empty string in case we fail.  Links
1066   // with empty HREFs are essentially non-functional, at least as of Firefox
1067   // 3.03.  This is preferrable behavior to links that look good but then 404.
1068   viewSourceUrl.Truncate();
1069
1070   // Get the character set.
1071   nsCString charset;
1072   PRInt32 source;
1073   mParser->GetDocumentCharset(charset, source);
1074
1075   // Get the BaseURI.
1076   rv = GetBaseURI(getter_AddRefs(baseURI));
1077   NS_ENSURE_SUCCESS(rv, rv);
1078
1079   // Use the link URL and the base URI to build a URI for the link.
1080   rv = NS_NewURI(getter_AddRefs(hrefURI), linkUrl, charset.get(), baseURI);
1081   NS_ENSURE_SUCCESS(rv, rv);
1082
1083   // Get the absolute URL from the link URI.
1084   nsCString absoluteLinkUrl;
1085   hrefURI->GetSpec(absoluteLinkUrl);
1086
1087   // Prepend "view-source:" onto the absolute URL and store it in the out param.
1088   viewSourceUrl.AssignLiteral("view-source:");
1089   viewSourceUrl.AppendWithConversion(absoluteLinkUrl);
1090
1091   return NS_OK;
1092 }
1093
1094 void CViewSourceHTML::WriteTextInSpan(const nsAString& text,
1095                                       nsTokenAllocator* allocator,
1096                                       const nsAString& attrName,
1097                                       const nsAString& attrValue) {
1098   NS_NAMED_LITERAL_STRING(SPAN, "SPAN");
1099   WriteTextInElement(SPAN, eHTMLTag_span, text, allocator, attrName, attrValue);
1100 }
1101
1102 void CViewSourceHTML::WriteTextInAnchor(const nsAString& text,
1103                                         nsTokenAllocator* allocator,
1104                                         const nsAString& attrName,
1105                                         const nsAString& attrValue) {
1106   NS_NAMED_LITERAL_STRING(ANCHOR, "A");
1107   WriteTextInElement(ANCHOR, eHTMLTag_a, text, allocator, attrName, attrValue);
1108 }
1109
1110 void CViewSourceHTML::WriteTextInElement(const nsAString& tagName,
1111                                          eHTMLTags tagType, const nsAString& text,
1112                                          nsTokenAllocator* allocator,
1113                                          const nsAString& attrName,
1114                                          const nsAString& attrValue) {
1115   // Open the element, supplying the attribute, if any.
1116   nsTokenAllocator* theAllocator = mTokenizer->GetTokenAllocator();
1117   if (!theAllocator) {
1118     return;
1119   }
1120
1121   CStartToken* startToken =
1122     static_cast<CStartToken*>
1123       (theAllocator->CreateTokenOfType(eToken_start, tagType, tagName));
1124   if (!startToken) {
1125     return;
1126   }
1127
1128   nsCParserStartNode startNode(startToken, theAllocator);
1129   if (!attrName.IsEmpty()) {
1130     AddAttrToNode(startNode, allocator, attrName, attrValue);
1131   }
1132   mSink->OpenContainer(startNode);
1133
1134   // Add the text node.
1135   CTextToken textToken(text);
1136   nsCParserNode textNode(&textToken, 0/*stack token*/);
1137   mSink->AddLeaf(textNode);
1138
1139   // Close the element.
1140   mSink->CloseContainer(tagType);
1141 }
1142
1143 const nsDependentSubstring CViewSourceHTML::TrimTokenValue(const nsAString& tokenValue) {
1144   nsAString::const_iterator start, end;
1145   tokenValue.BeginReading(start);
1146   tokenValue.EndReading(end);
1147   TrimTokenValue(start, end);
1148   return Substring(start, end);
1149 }
1150
1151 void CViewSourceHTML::TrimTokenValue(nsAString::const_iterator& start,
1152                                      nsAString::const_iterator& end) {
1153   // Token values -- tag names, attribute names, and attribute values --
1154   // generally contain adjacent whitespace and, in the case of attribute values,
1155   // the surrounding double or single quotes.  Return a new string with this
1156   // adjacent text stripped off, so only the value proper remains.
1157
1158   // Skip past any whitespace or quotes on the left.
1159   while (start != end) {
1160     if (!IsTokenValueTrimmableCharacter(*start)) break;
1161     ++start;
1162   }
1163
1164   // Skip past any whitespace or quotes on the right.  Note that the interval
1165   // start..end is half-open.  That means the last character of the interval is
1166   // at *(end - 1).
1167   while (end != start) {
1168     --end;
1169     if (!IsTokenValueTrimmableCharacter(*end)) {
1170       ++end; // we've actually gone one too far at this point, so adjust.
1171       break;
1172     }
1173   }
1174 }
1175
1176 PRBool CViewSourceHTML::IsTokenValueTrimmableCharacter(PRUnichar ch) {
1177   if (ch == ' ') return PR_TRUE;
1178   if (ch == '\t') return PR_TRUE;
1179   if (ch == '\r') return PR_TRUE;
1180   if (ch == '\n') return PR_TRUE;
1181   if (ch == '\'') return PR_TRUE;
1182   if (ch == '"') return PR_TRUE;
1183   return PR_FALSE;
1184 }
1185
1186 nsresult CViewSourceHTML::GetBaseURI(nsIURI **result) {
1187   nsresult rv = NS_OK;
1188   if (!mBaseURI) {
1189     rv = SetBaseURI(mFilename);
1190   }
1191   NS_IF_ADDREF(*result = mBaseURI);
1192   return rv;
1193 }
1194
1195 nsresult CViewSourceHTML::SetBaseURI(const nsAString& baseSpec) {
1196   // Get the character set.
1197   nsCString charset;
1198   PRInt32 source;
1199   mParser->GetDocumentCharset(charset, source);
1200
1201   // Create a new base URI and store it in mBaseURI.
1202   nsCOMPtr<nsIURI> baseURI;
1203   nsresult rv = NS_NewURI(getter_AddRefs(baseURI), baseSpec, charset.get());
1204   NS_ENSURE_SUCCESS(rv, rv);
1205   mBaseURI = baseURI;
1206   return NS_OK;
1207 }