layout/style/nsCSSScanner.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is mozilla.org code.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Netscape Communications Corporation.
  19  * Portions created by the Initial Developer are Copyright (C) 1998
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   L. David Baron <dbaron@dbaron.org>
  24  *   Daniel Glazman <glazman@netscape.com>
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either of the GNU General Public License Version 2 or later (the "GPL"),
  28  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 /* tokenization of CSS style sheets */
  41
  42 #include "nsCSSScanner.h"
  43 #include "nsIFactory.h"
  44 #include "nsIInputStream.h"
  45 #include "nsIUnicharInputStream.h"
  46 #include "nsString.h"
  47 #include "nsCRT.h"
  48
  49 // for #ifdef CSS_REPORT_PARSE_ERRORS
  50 #include "nsCOMPtr.h"
  51 #include "nsIServiceManager.h"
  52 #include "nsIComponentManager.h"
  53 #include "nsReadableUtils.h"
  54 #include "nsIURI.h"
  55 #include "nsIConsoleService.h"
  56 #include "nsIScriptError.h"
  57 #include "nsIStringBundle.h"
  58 #include "nsContentUtils.h"
  59
  60 // Don't bother collecting whitespace characters in token's mIdent buffer
  61 #undef COLLECT_WHITESPACE
  62
  63 static const PRUnichar CSS_ESCAPE = PRUnichar('\\');
  64 const PRUint8 nsCSSScanner::IS_DIGIT = 0x01;
  65 const PRUint8 nsCSSScanner::IS_HEX_DIGIT = 0x02;
  66 const PRUint8 nsCSSScanner::START_IDENT = 0x04;
  67 const PRUint8 nsCSSScanner::IS_IDENT = 0x08;
  68 const PRUint8 nsCSSScanner::IS_WHITESPACE = 0x10;
  69
  70 static PRBool gLexTableSetup = PR_FALSE;
  71 PRUint8 nsCSSScanner::gLexTable[256];
  72
  73 #ifdef CSS_REPORT_PARSE_ERRORS
  74 static PRBool gReportErrors = PR_TRUE;
  75 static nsIConsoleService *gConsoleService;
  76 static nsIFactory *gScriptErrorFactory;
  77 static nsIStringBundle *gStringBundle;
  78 #endif
  79
  80 /* static */
  81 void
  82 nsCSSScanner::BuildLexTable()
  83 {
  84   gLexTableSetup = PR_TRUE;
  85
  86   PRUint8* lt = gLexTable;
  87   int i;
  88   lt[CSS_ESCAPE] = START_IDENT;
  89   lt['-'] |= IS_IDENT;
  90   lt['_'] |= IS_IDENT | START_IDENT;
  91   lt[' '] |= IS_WHITESPACE;   // space
  92   lt['\t'] |= IS_WHITESPACE;  // horizontal tab
  93   lt['\r'] |= IS_WHITESPACE;  // carriage return
  94   lt['\n'] |= IS_WHITESPACE;  // line feed
  95   lt['\f'] |= IS_WHITESPACE;  // form feed
  96   for (i = 161; i <= 255; i++) {
  97     lt[i] |= IS_IDENT | START_IDENT;
  98   }
  99   for (i = '0'; i <= '9'; i++) {
 100     lt[i] |= IS_DIGIT | IS_HEX_DIGIT | IS_IDENT;
 101   }
 102   for (i = 'A'; i <= 'Z'; i++) {
 103     if ((i >= 'A') && (i <= 'F')) {
 104       lt[i] |= IS_HEX_DIGIT;
 105       lt[i+32] |= IS_HEX_DIGIT;
 106     }
 107     lt[i] |= IS_IDENT | START_IDENT;
 108     lt[i+32] |= IS_IDENT | START_IDENT;
 109   }
 110 }
 111
 112 nsCSSToken::nsCSSToken()
 113 {
 114   mType = eCSSToken_Symbol;
 115 }
 116
 117 void
 118 nsCSSToken::AppendToString(nsString& aBuffer)
 119 {
 120   switch (mType) {
 121     case eCSSToken_AtKeyword:
 122       aBuffer.Append(PRUnichar('@')); // fall through intentional
 123     case eCSSToken_Ident:
 124     case eCSSToken_WhiteSpace:
 125     case eCSSToken_Function:
 126     case eCSSToken_URL:
 127     case eCSSToken_InvalidURL:
 128     case eCSSToken_HTMLComment:
 129       aBuffer.Append(mIdent);
 130       break;
 131     case eCSSToken_Number:
 132       if (mIntegerValid) {
 133         aBuffer.AppendInt(mInteger, 10);
 134       }
 135       else {
 136         aBuffer.AppendFloat(mNumber);
 137       }
 138       break;
 139     case eCSSToken_Percentage:
 140       NS_ASSERTION(!mIntegerValid, "How did a percentage token get this set?");
 141       aBuffer.AppendFloat(mNumber * 100.0f);
 142       aBuffer.Append(PRUnichar('%')); // STRING USE WARNING: technically, this should be |AppendWithConversion|
 143       break;
 144     case eCSSToken_Dimension:
 145       if (mIntegerValid) {
 146         aBuffer.AppendInt(mInteger, 10);
 147       }
 148       else {
 149         aBuffer.AppendFloat(mNumber);
 150       }
 151       aBuffer.Append(mIdent);
 152       break;
 153     case eCSSToken_String:
 154       aBuffer.Append(mSymbol);
 155       aBuffer.Append(mIdent); // fall through intentional
 156     case eCSSToken_Symbol:
 157       aBuffer.Append(mSymbol);
 158       break;
 159     case eCSSToken_ID:
 160     case eCSSToken_Ref:
 161       aBuffer.Append(PRUnichar('#'));
 162       aBuffer.Append(mIdent);
 163       break;
 164     case eCSSToken_Includes:
 165       aBuffer.AppendLiteral("~=");
 166       break;
 167     case eCSSToken_Dashmatch:
 168       aBuffer.AppendLiteral("|=");
 169       break;
 170     case eCSSToken_Beginsmatch:
 171       aBuffer.AppendLiteral("^=");
 172       break;
 173     case eCSSToken_Endsmatch:
 174       aBuffer.AppendLiteral("$=");
 175       break;
 176     case eCSSToken_Containsmatch:
 177       aBuffer.AppendLiteral("*=");
 178       break;
 179     case eCSSToken_Error:
 180       aBuffer.Append(mSymbol);
 181       aBuffer.Append(mIdent);
 182       break;
 183     default:
 184       NS_ERROR("invalid token type");
 185       break;
 186   }
 187 }
 188
 189 nsCSSScanner::nsCSSScanner()
 190   : mInputStream(nsnull)
 191   , mReadPointer(nsnull)
 192 #ifdef MOZ_SVG
 193   , mSVGMode(PR_FALSE)
 194 #endif
 195 #ifdef CSS_REPORT_PARSE_ERRORS
 196   , mError(mErrorBuf, NS_ARRAY_LENGTH(mErrorBuf), 0)
 197 #endif
 198 {
 199   MOZ_COUNT_CTOR(nsCSSScanner);
 200   if (!gLexTableSetup) {
 201     // XXX need a monitor
 202     BuildLexTable();
 203   }
 204   mPushback = mLocalPushback;
 205   mPushbackSize = NS_ARRAY_LENGTH(mLocalPushback);
 206   // No need to init the other members, since they represent state
 207   // which can get cleared.  We'll init them every time Init() is
 208   // called.
 209 }
 210
 211 nsCSSScanner::~nsCSSScanner()
 212 {
 213   MOZ_COUNT_DTOR(nsCSSScanner);
 214   Close();
 215   if (mLocalPushback != mPushback) {
 216     delete [] mPushback;
 217   }
 218 }
 219
 220 #ifdef CSS_REPORT_PARSE_ERRORS
 221 #define CSS_ERRORS_PREF "layout.css.report_errors"
 222
 223 PR_STATIC_CALLBACK(int) CSSErrorsPrefChanged(const char *aPref, void *aClosure)
 224 {
 225   gReportErrors = nsContentUtils::GetBoolPref(CSS_ERRORS_PREF, PR_TRUE);
 226   return NS_OK;
 227 }
 228 #endif
 229
 230 /* static */ PRBool nsCSSScanner::InitGlobals()
 231 {
 232 #ifdef CSS_REPORT_PARSE_ERRORS
 233   if (gConsoleService && gScriptErrorFactory)
 234     return PR_TRUE;
 235
 236   nsresult rv = CallGetService(NS_CONSOLESERVICE_CONTRACTID, &gConsoleService);
 237   NS_ENSURE_SUCCESS(rv, PR_FALSE);
 238
 239   rv = CallGetClassObject(NS_SCRIPTERROR_CONTRACTID, &gScriptErrorFactory);
 240   NS_ENSURE_SUCCESS(rv, PR_FALSE);
 241   NS_ASSERTION(gConsoleService && gScriptErrorFactory,
 242                "unexpected null pointer without failure");
 243
 244   nsContentUtils::RegisterPrefCallback(CSS_ERRORS_PREF, CSSErrorsPrefChanged, nsnull);
 245   CSSErrorsPrefChanged(CSS_ERRORS_PREF, nsnull);
 246 #endif
 247   return PR_TRUE;
 248 }
 249
 250 /* static */ void nsCSSScanner::ReleaseGlobals()
 251 {
 252 #ifdef CSS_REPORT_PARSE_ERRORS
 253   nsContentUtils::UnregisterPrefCallback(CSS_ERRORS_PREF, CSSErrorsPrefChanged, nsnull);
 254   NS_IF_RELEASE(gConsoleService);
 255   NS_IF_RELEASE(gScriptErrorFactory);
 256   NS_IF_RELEASE(gStringBundle);
 257 #endif
 258 }
 259
 260 void nsCSSScanner::Init(nsIUnicharInputStream* aInput,
 261                         const PRUnichar * aBuffer, PRUint32 aCount,
 262                         nsIURI* aURI, PRUint32 aLineNumber)
 263 {
 264   NS_PRECONDITION(!mInputStream, "Should not have an existing input stream!");
 265   NS_PRECONDITION(!mReadPointer, "Should not have an existing input buffer!");
 266
 267   // Read from stream via my own buffer
 268   if (aInput) {
 269     NS_PRECONDITION(!aBuffer, "Shouldn't have both input and buffer!");
 270     NS_PRECONDITION(aCount == 0, "Shouldn't have count with a stream");
 271     mInputStream = aInput;
 272     mReadPointer = mBuffer;
 273     mCount = 0;
 274   } else {
 275     NS_PRECONDITION(aBuffer, "Either aInput or aBuffer must be set");
 276     // Read directly from the provided buffer
 277     mInputStream = nsnull;
 278     mReadPointer = aBuffer;
 279     mCount = aCount;
 280   }
 281
 282 #ifdef CSS_REPORT_PARSE_ERRORS
 283   // If aURI is the same as mURI, no need to reget mFileName -- it
 284   // shouldn't have changed.
 285   if (aURI != mURI) {
 286     mURI = aURI;
 287     if (aURI) {
 288       aURI->GetSpec(mFileName);
 289     } else {
 290       mFileName.Adopt(NS_strdup("from DOM"));
 291     }
 292   }
 293 #endif // CSS_REPORT_PARSE_ERRORS
 294   mLineNumber = aLineNumber;
 295
 296   // Reset variables that we use to keep track of our progress through the input
 297   mOffset = 0;
 298   mPushbackCount = 0;
 299
 300 #ifdef CSS_REPORT_PARSE_ERRORS
 301   mColNumber = 0;
 302 #endif
 303 }
 304
 305 #ifdef CSS_REPORT_PARSE_ERRORS
 306
 307 // @see REPORT_UNEXPECTED_EOF in nsCSSParser.cpp
 308 #define REPORT_UNEXPECTED_EOF(lf_) \
 309   ReportUnexpectedEOF(#lf_)
 310
 311 void nsCSSScanner::AddToError(const nsSubstring& aErrorText)
 312 {
 313   if (mError.IsEmpty()) {
 314     mErrorLineNumber = mLineNumber;
 315     mErrorColNumber = mColNumber;
 316     mError = aErrorText;
 317   } else {
 318     mError.Append(NS_LITERAL_STRING("  ") + aErrorText);
 319   }
 320 }
 321
 322 void nsCSSScanner::ClearError()
 323 {
 324   mError.Truncate();
 325 }
 326
 327 void nsCSSScanner::OutputError()
 328 {
 329   if (mError.IsEmpty()) return;
 330
 331   // Log it to the Error console
 332
 333   if (InitGlobals() && gReportErrors) {
 334     nsresult rv;
 335     nsCOMPtr<nsIScriptError> errorObject =
 336       do_CreateInstance(gScriptErrorFactory, &rv);
 337     if (NS_SUCCEEDED(rv)) {
 338       rv = errorObject->Init(mError.get(),
 339                              NS_ConvertUTF8toUTF16(mFileName).get(),
 340                              EmptyString().get(),
 341                              mErrorLineNumber,
 342                              mErrorColNumber,
 343                              nsIScriptError::warningFlag,
 344                              "CSS Parser");
 345       if (NS_SUCCEEDED(rv))
 346         gConsoleService->LogMessage(errorObject);
 347     }
 348   }
 349   ClearError();
 350 }
 351
 352 static PRBool InitStringBundle()
 353 {
 354   if (gStringBundle)
 355     return PR_TRUE;
 356
 357   nsCOMPtr<nsIStringBundleService> sbs =
 358     do_GetService(NS_STRINGBUNDLE_CONTRACTID);
 359   if (!sbs)
 360     return PR_FALSE;
 361
 362   nsresult rv =
 363     sbs->CreateBundle("chrome://global/locale/css.properties", &gStringBundle);
 364   if (NS_FAILED(rv)) {
 365     gStringBundle = nsnull;
 366     return PR_FALSE;
 367   }
 368
 369   return PR_TRUE;
 370 }
 371
 372 #define ENSURE_STRINGBUNDLE \
 373   PR_BEGIN_MACRO if (!InitStringBundle()) return; PR_END_MACRO
 374
 375 // aMessage must take no parameters
 376 void nsCSSScanner::ReportUnexpected(const char* aMessage)
 377 {
 378   ENSURE_STRINGBUNDLE;
 379
 380   nsXPIDLString str;
 381   gStringBundle->GetStringFromName(NS_ConvertASCIItoUTF16(aMessage).get(),
 382                                    getter_Copies(str));
 383   AddToError(str);
 384 }
 385
 386 void nsCSSScanner::ReportUnexpectedParams(const char* aMessage,
 387                                           const PRUnichar **aParams,
 388                                           PRUint32 aParamsLength)
 389 {
 390   NS_PRECONDITION(aParamsLength > 0, "use the non-params version");
 391   ENSURE_STRINGBUNDLE;
 392
 393   nsXPIDLString str;
 394   gStringBundle->FormatStringFromName(NS_ConvertASCIItoUTF16(aMessage).get(),
 395                                       aParams, aParamsLength,
 396                                       getter_Copies(str));
 397   AddToError(str);
 398 }
 399
 400 // aLookingFor is a plain string, not a format string
 401 void nsCSSScanner::ReportUnexpectedEOF(const char* aLookingFor)
 402 {
 403   ENSURE_STRINGBUNDLE;
 404
 405   nsXPIDLString innerStr;
 406   gStringBundle->GetStringFromName(NS_ConvertASCIItoUTF16(aLookingFor).get(),
 407                                    getter_Copies(innerStr));
 408
 409   const PRUnichar *params[] = {
 410     innerStr.get()
 411   };
 412   nsXPIDLString str;
 413   gStringBundle->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
 414                                       params, NS_ARRAY_LENGTH(params),
 415                                       getter_Copies(str));
 416   AddToError(str);
 417 }
 418
 419 // aLookingFor is a single character
 420 void nsCSSScanner::ReportUnexpectedEOF(PRUnichar aLookingFor)
 421 {
 422   ENSURE_STRINGBUNDLE;
 423
 424   const PRUnichar lookingForStr[] = {
 425     PRUnichar('\''), aLookingFor, PRUnichar('\''), PRUnichar(0)
 426   };
 427   const PRUnichar *params[] = { lookingForStr };
 428   nsXPIDLString str;
 429   gStringBundle->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
 430                                       params, NS_ARRAY_LENGTH(params),
 431                                       getter_Copies(str));
 432   AddToError(str);
 433 }
 434
 435 // aMessage must take 1 parameter (for the string representation of the
 436 // unexpected token)
 437 void nsCSSScanner::ReportUnexpectedToken(nsCSSToken& tok,
 438                                          const char *aMessage)
 439 {
 440   ENSURE_STRINGBUNDLE;
 441
 442   nsAutoString tokenString;
 443   tok.AppendToString(tokenString);
 444
 445   const PRUnichar *params[] = {
 446     tokenString.get()
 447   };
 448
 449   ReportUnexpectedParams(aMessage, params, NS_ARRAY_LENGTH(params));
 450 }
 451
 452 // aParams's first entry must be null, and we'll fill in the token
 453 void nsCSSScanner::ReportUnexpectedTokenParams(nsCSSToken& tok,
 454                                                const char* aMessage,
 455                                                const PRUnichar **aParams,
 456                                                PRUint32 aParamsLength)
 457 {
 458   NS_PRECONDITION(aParamsLength > 1, "use the non-params version");
 459   NS_PRECONDITION(aParams[0] == nsnull, "first param should be empty");
 460
 461   ENSURE_STRINGBUNDLE;
 462
 463   nsAutoString tokenString;
 464   tok.AppendToString(tokenString);
 465   aParams[0] = tokenString.get();
 466
 467   ReportUnexpectedParams(aMessage, aParams, aParamsLength);
 468 }
 469
 470 #else
 471
 472 #define REPORT_UNEXPECTED_EOF(lf_)
 473
 474 #endif // CSS_REPORT_PARSE_ERRORS
 475
 476 void nsCSSScanner::Close()
 477 {
 478   mInputStream = nsnull;
 479   mReadPointer = nsnull;
 480
 481   // Clean things up so we don't hold on to memory if our parser gets recycled.
 482 #ifdef CSS_REPORT_PARSE_ERRORS
 483   mFileName.Truncate();
 484   mURI = nsnull;
 485   mError.Truncate();
 486 #endif
 487   if (mPushback != mLocalPushback) {
 488     delete [] mPushback;
 489     mPushback = mLocalPushback;
 490     mPushbackSize = NS_ARRAY_LENGTH(mLocalPushback);
 491   }
 492 }
 493
 494 #ifdef CSS_REPORT_PARSE_ERRORS
 495 #define TAB_STOP_WIDTH 8
 496 #endif
 497
 498 PRBool nsCSSScanner::EnsureData(nsresult& aErrorCode)
 499 {
 500   if (mOffset < mCount)
 501     return PR_TRUE;
 502
 503   if (mInputStream) {
 504     mOffset = 0;
 505     aErrorCode = mInputStream->Read(mBuffer, CSS_BUFFER_SIZE, &mCount);
 506     if (NS_FAILED(aErrorCode) || mCount == 0) {
 507       mCount = 0;
 508       return PR_FALSE;
 509     }
 510     return PR_TRUE;
 511   }
 512
 513   return PR_FALSE;
 514 }
 515
 516 // Returns -1 on error or eof
 517 PRInt32 nsCSSScanner::Read(nsresult& aErrorCode)
 518 {
 519   PRInt32 rv;
 520   if (0 < mPushbackCount) {
 521     rv = PRInt32(mPushback[--mPushbackCount]);
 522   } else {
 523     if (mOffset == mCount && !EnsureData(aErrorCode)) {
 524       return -1;
 525     }
 526     rv = PRInt32(mReadPointer[mOffset++]);
 527     // There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
 528     // To simplify dealing with newlines, they are all normalized to "\n" here
 529     if (rv == '\r') {
 530       if (EnsureData(aErrorCode) && mReadPointer[mOffset] == '\n') {
 531         mOffset++;
 532       }
 533       rv = '\n';
 534     } else if (rv == '\f') {
 535       rv = '\n';
 536     }
 537     if (rv == '\n') {
 538       // 0 is a magical line number meaning that we don't know (i.e., script)
 539       if (mLineNumber != 0)
 540         ++mLineNumber;
 541 #ifdef CSS_REPORT_PARSE_ERRORS
 542       mColNumber = 0;
 543 #endif
 544     }
 545 #ifdef CSS_REPORT_PARSE_ERRORS
 546     else if (rv == '\t') {
 547       mColNumber = ((mColNumber - 1 + TAB_STOP_WIDTH) / TAB_STOP_WIDTH)
 548                    * TAB_STOP_WIDTH;
 549     } else if (rv != '\n') {
 550       mColNumber++;
 551     }
 552 #endif
 553   }
 554 //printf("Read => %x\n", rv);
 555   return rv;
 556 }
 557
 558 PRInt32 nsCSSScanner::Peek(nsresult& aErrorCode)
 559 {
 560   if (0 == mPushbackCount) {
 561     PRInt32 ch = Read(aErrorCode);
 562     if (ch < 0) {
 563       return -1;
 564     }
 565     mPushback[0] = PRUnichar(ch);
 566     mPushbackCount++;
 567   }
 568 //printf("Peek => %x\n", mLookAhead);
 569   return PRInt32(mPushback[mPushbackCount - 1]);
 570 }
 571
 572 void nsCSSScanner::Pushback(PRUnichar aChar)
 573 {
 574   if (mPushbackCount == mPushbackSize) { // grow buffer
 575     PRUnichar*  newPushback = new PRUnichar[mPushbackSize + 4];
 576     if (nsnull == newPushback) {
 577       return;
 578     }
 579     mPushbackSize += 4;
 580     memcpy(newPushback, mPushback, sizeof(PRUnichar) * mPushbackCount);
 581     if (mPushback != mLocalPushback) {
 582       delete [] mPushback;
 583     }
 584     mPushback = newPushback;
 585   }
 586   mPushback[mPushbackCount++] = aChar;
 587 }
 588
 589 PRBool nsCSSScanner::LookAhead(nsresult& aErrorCode, PRUnichar aChar)
 590 {
 591   PRInt32 ch = Read(aErrorCode);
 592   if (ch < 0) {
 593     return PR_FALSE;
 594   }
 595   if (ch == aChar) {
 596     return PR_TRUE;
 597   }
 598   Pushback(ch);
 599   return PR_FALSE;
 600 }
 601
 602 PRBool nsCSSScanner::EatWhiteSpace(nsresult& aErrorCode)
 603 {
 604   PRBool eaten = PR_FALSE;
 605   for (;;) {
 606     PRInt32 ch = Read(aErrorCode);
 607     if (ch < 0) {
 608       break;
 609     }
 610     if ((ch == ' ') || (ch == '\n') || (ch == '\t')) {
 611       eaten = PR_TRUE;
 612       continue;
 613     }
 614     Pushback(ch);
 615     break;
 616   }
 617   return eaten;
 618 }
 619
 620 PRBool nsCSSScanner::EatNewline(nsresult& aErrorCode)
 621 {
 622   PRInt32 ch = Read(aErrorCode);
 623   if (ch < 0) {
 624     return PR_FALSE;
 625   }
 626   PRBool eaten = PR_FALSE;
 627   if (ch == '\n') {
 628     eaten = PR_TRUE;
 629   } else {
 630     Pushback(ch);
 631   }
 632   return eaten;
 633 }
 634
 635 PRBool nsCSSScanner::Next(nsresult& aErrorCode, nsCSSToken& aToken)
 636 {
 637   PRInt32 ch = Read(aErrorCode);
 638   if (ch < 0) {
 639     return PR_FALSE;
 640   }
 641
 642   // IDENT
 643   if (StartsIdent(ch, Peek(aErrorCode)))
 644     return ParseIdent(aErrorCode, ch, aToken);
 645
 646   // AT_KEYWORD
 647   if (ch == '@') {
 648     PRInt32 nextChar = Read(aErrorCode);
 649     if (nextChar >= 0) {
 650       PRInt32 followingChar = Peek(aErrorCode);
 651       Pushback(nextChar);
 652       if (StartsIdent(nextChar, followingChar))
 653         return ParseAtKeyword(aErrorCode, ch, aToken);
 654     }
 655   }
 656
 657   // NUMBER or DIM
 658   if ((ch == '.') || (ch == '+') || (ch == '-')) {
 659     PRInt32 nextChar = Peek(aErrorCode);
 660     if (IsDigit(nextChar)) {
 661       return ParseNumber(aErrorCode, ch, aToken);
 662     }
 663     else if (('.' == nextChar) && ('.' != ch)) {
 664       nextChar = Read(aErrorCode);
 665       PRInt32 followingChar = Peek(aErrorCode);
 666       Pushback(nextChar);
 667       if (IsDigit(followingChar))
 668         return ParseNumber(aErrorCode, ch, aToken);
 669     }
 670   }
 671   if (IsDigit(ch)) {
 672     return ParseNumber(aErrorCode, ch, aToken);
 673   }
 674
 675   // ID
 676   if (ch == '#') {
 677     return ParseRef(aErrorCode, ch, aToken);
 678   }
 679
 680   // STRING
 681   if ((ch == '"') || (ch == '\'')) {
 682     return ParseString(aErrorCode, ch, aToken);
 683   }
 684
 685   // WS
 686   if (IsWhitespace(ch)) {
 687     aToken.mType = eCSSToken_WhiteSpace;
 688     aToken.mIdent.Assign(PRUnichar(ch));
 689     (void) EatWhiteSpace(aErrorCode);
 690     return PR_TRUE;
 691   }
 692   if (ch == '/') {
 693     PRInt32 nextChar = Peek(aErrorCode);
 694     if (nextChar == '*') {
 695       (void) Read(aErrorCode);
 696 #if 0
 697       // If we change our storage data structures such that comments are
 698       // stored (for Editor), we should reenable this code, condition it
 699       // on being in editor mode, and apply glazou's patch from bug
 700       // 60290.
 701       aToken.mIdent.SetCapacity(2);
 702       aToken.mIdent.Assign(PRUnichar(ch));
 703       aToken.mIdent.Append(PRUnichar(nextChar));
 704       return ParseCComment(aErrorCode, aToken);
 705 #endif
 706       return SkipCComment(aErrorCode) && Next(aErrorCode, aToken);
 707     }
 708   }
 709   if (ch == '<') {  // consume HTML comment tags
 710     if (LookAhead(aErrorCode, '!')) {
 711       if (LookAhead(aErrorCode, '-')) {
 712         if (LookAhead(aErrorCode, '-')) {
 713           aToken.mType = eCSSToken_HTMLComment;
 714           aToken.mIdent.AssignLiteral("<!--");
 715           return PR_TRUE;
 716         }
 717         Pushback('-');
 718       }
 719       Pushback('!');
 720     }
 721   }
 722   if (ch == '-') {  // check for HTML comment end
 723     if (LookAhead(aErrorCode, '-')) {
 724       if (LookAhead(aErrorCode, '>')) {
 725         aToken.mType = eCSSToken_HTMLComment;
 726         aToken.mIdent.AssignLiteral("-->");
 727         return PR_TRUE;
 728       }
 729       Pushback('-');
 730     }
 731   }
 732
 733   // INCLUDES ("~=") and DASHMATCH ("|=")
 734   if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
 735       ( ch == '$' ) || ( ch == '*' )) {
 736     PRInt32 nextChar = Read(aErrorCode);
 737     if ( nextChar == '=' ) {
 738       if (ch == '~') {
 739         aToken.mType = eCSSToken_Includes;
 740       }
 741       else if (ch == '|') {
 742         aToken.mType = eCSSToken_Dashmatch;
 743       }
 744       else if (ch == '^') {
 745         aToken.mType = eCSSToken_Beginsmatch;
 746       }
 747       else if (ch == '$') {
 748         aToken.mType = eCSSToken_Endsmatch;
 749       }
 750       else if (ch == '*') {
 751         aToken.mType = eCSSToken_Containsmatch;
 752       }
 753       return PR_TRUE;
 754     } else if (nextChar >= 0) {
 755       Pushback(nextChar);
 756     }
 757   }
 758   aToken.mType = eCSSToken_Symbol;
 759   aToken.mSymbol = ch;
 760   return PR_TRUE;
 761 }
 762
 763 PRBool nsCSSScanner::NextURL(nsresult& aErrorCode, nsCSSToken& aToken)
 764 {
 765   PRInt32 ch = Read(aErrorCode);
 766   if (ch < 0) {
 767     return PR_FALSE;
 768   }
 769
 770   // STRING
 771   if ((ch == '"') || (ch == '\'')) {
 772     return ParseString(aErrorCode, ch, aToken);
 773   }
 774
 775   // WS
 776   if (IsWhitespace(ch)) {
 777     aToken.mType = eCSSToken_WhiteSpace;
 778     aToken.mIdent.Assign(PRUnichar(ch));
 779     (void) EatWhiteSpace(aErrorCode);
 780     return PR_TRUE;
 781   }
 782   if (ch == '/') {
 783     PRInt32 nextChar = Peek(aErrorCode);
 784     if (nextChar == '*') {
 785       (void) Read(aErrorCode);
 786 #if 0
 787       // If we change our storage data structures such that comments are
 788       // stored (for Editor), we should reenable this code, condition it
 789       // on being in editor mode, and apply glazou's patch from bug
 790       // 60290.
 791       aToken.mIdent.SetCapacity(2);
 792       aToken.mIdent.Assign(PRUnichar(ch));
 793       aToken.mIdent.Append(PRUnichar(nextChar));
 794       return ParseCComment(aErrorCode, aToken);
 795 #endif
 796       return SkipCComment(aErrorCode) && Next(aErrorCode, aToken);
 797     }
 798   }
 799
 800   // Process a url lexical token. A CSS1 url token can contain
 801   // characters beyond identifier characters (e.g. '/', ':', etc.)
 802   // Because of this the normal rules for tokenizing the input don't
 803   // apply very well. To simplify the parser and relax some of the
 804   // requirements on the scanner we parse url's here. If we find a
 805   // malformed URL then we emit a token of type "InvalidURL" so that
 806   // the CSS1 parser can ignore the invalid input. We attempt to eat
 807   // the right amount of input data when an invalid URL is presented.
 808
 809   aToken.mType = eCSSToken_InvalidURL;
 810   nsString& ident = aToken.mIdent;
 811   ident.SetLength(0);
 812
 813   if (ch == ')') {
 814     Pushback(ch);
 815     // empty url spec; just get out of here
 816     aToken.mType = eCSSToken_URL;
 817   } else {
 818     // start of a non-quoted url
 819     Pushback(ch);
 820     PRBool ok = PR_TRUE;
 821     for (;;) {
 822       ch = Read(aErrorCode);
 823       if (ch < 0) break;
 824       if (ch == CSS_ESCAPE) {
 825         ParseAndAppendEscape(aErrorCode, ident);
 826       } else if ((ch == '"') || (ch == '\'') || (ch == '(')) {
 827         // This is an invalid URL spec
 828         ok = PR_FALSE;
 829       } else if (IsWhitespace(ch)) {
 830         // Whitespace is allowed at the end of the URL
 831         (void) EatWhiteSpace(aErrorCode);
 832         if (LookAhead(aErrorCode, ')')) {
 833           Pushback(')');  // leave the closing symbol
 834           // done!
 835           break;
 836         }
 837         // Whitespace is followed by something other than a
 838         // ")". This is an invalid url spec.
 839         ok = PR_FALSE;
 840       } else if (ch == ')') {
 841         Pushback(ch);
 842         // All done
 843         break;
 844       } else {
 845         // A regular url character.
 846         ident.Append(PRUnichar(ch));
 847       }
 848     }
 849
 850     // If the result of the above scanning is ok then change the token
 851     // type to a useful one.
 852     if (ok) {
 853       aToken.mType = eCSSToken_URL;
 854     }
 855   }
 856   return PR_TRUE;
 857 }
 858
 859
 860 void
 861 nsCSSScanner::ParseAndAppendEscape(nsresult& aErrorCode, nsString& aOutput)
 862 {
 863   PRInt32 ch = Peek(aErrorCode);
 864   if (ch < 0) {
 865     aOutput.Append(CSS_ESCAPE);
 866     return;
 867   }
 868   if (IsHexDigit(ch)) {
 869     PRInt32 rv = 0;
 870     int i;
 871     for (i = 0; i < 6; i++) { // up to six digits
 872       ch = Read(aErrorCode);
 873       if (ch < 0) {
 874         // Whoops: error or premature eof
 875         break;
 876       }
 877       if (!IsHexDigit(ch) && !IsWhitespace(ch)) {
 878         Pushback(ch);
 879         break;
 880       } else if (IsHexDigit(ch)) {
 881         if (IsDigit(ch)) {
 882           rv = rv * 16 + (ch - '0');
 883         } else {
 884           // Note: c&7 just keeps the low three bits which causes
 885           // upper and lower case alphabetics to both yield their
 886           // "relative to 10" value for computing the hex value.
 887           rv = rv * 16 + ((ch & 0x7) + 9);
 888         }
 889       } else {
 890         NS_ASSERTION(IsWhitespace(ch), "bad control flow");
 891         // single space ends escape
 892         break;
 893       }
 894     }
 895     if (6 == i) { // look for trailing whitespace and eat it
 896       ch = Peek(aErrorCode);
 897       if (IsWhitespace(ch)) {
 898         ch = Read(aErrorCode);
 899       }
 900     }
 901     NS_ASSERTION(rv >= 0, "How did rv become negative?");
 902     if (rv > 0) {
 903       AppendUCS4ToUTF16(ENSURE_VALID_CHAR(rv), aOutput);
 904     }
 905     return;
 906   } else {
 907     // "Any character except a hexidecimal digit can be escaped to
 908     // remove its special meaning by putting a backslash in front"
 909     // -- CSS1 spec section 7.1
 910     if (!EatNewline(aErrorCode)) { // skip escaped newline
 911       (void) Read(aErrorCode);
 912       if (ch > 0) {
 913         aOutput.Append(ch);
 914       }
 915     }
 916     return;
 917   }
 918 }
 919
 920 /**
 921  * Gather up the characters in an identifier. The identfier was
 922  * started by "aChar" which will be appended to aIdent. The result
 923  * will be aIdent with all of the identifier characters appended
 924  * until the first non-identifier character is seen. The termination
 925  * character is unread for the future re-reading.
 926  */
 927 PRBool nsCSSScanner::GatherIdent(nsresult& aErrorCode, PRInt32 aChar,
 928                                  nsString& aIdent)
 929 {
 930   if (aChar == CSS_ESCAPE) {
 931     ParseAndAppendEscape(aErrorCode, aIdent);
 932   }
 933   else if (0 < aChar) {
 934     aIdent.Append(aChar);
 935   }
 936   for (;;) {
 937     // If nothing in pushback, first try to get as much as possible in one go
 938     if (!mPushbackCount && EnsureData(aErrorCode)) {
 939       // See how much we can consume and append in one go
 940       PRUint32 n = mOffset;
 941       // Count number of Ident characters that can be processed
 942       while (n < mCount && IsIdent(mReadPointer[n])) {
 943         ++n;
 944       }
 945       // Add to the token what we have so far
 946       if (n > mOffset) {
 947 #ifdef CSS_REPORT_PARSE_ERRORS
 948         mColNumber += n - mOffset;
 949 #endif
 950         aIdent.Append(&mReadPointer[mOffset], n - mOffset);
 951         mOffset = n;
 952       }
 953     }
 954
 955     aChar = Read(aErrorCode);
 956     if (aChar < 0) break;
 957     if (aChar == CSS_ESCAPE) {
 958       ParseAndAppendEscape(aErrorCode, aIdent);
 959     } else if (IsIdent(aChar)) {
 960       aIdent.Append(PRUnichar(aChar));
 961     } else {
 962       Pushback(aChar);
 963       break;
 964     }
 965   }
 966   return PR_TRUE;
 967 }
 968
 969 PRBool nsCSSScanner::ParseRef(nsresult& aErrorCode,
 970                               PRInt32 aChar,
 971                               nsCSSToken& aToken)
 972 {
 973   aToken.mIdent.SetLength(0);
 974   aToken.mType = eCSSToken_Ref;
 975   PRInt32 ch = Read(aErrorCode);
 976   if (ch < 0) {
 977     return PR_FALSE;
 978   }
 979   if (IsIdent(ch) || ch == CSS_ESCAPE) {
 980     // First char after the '#' is a valid ident char (or an escape),
 981     // so it makes sense to keep going
 982     if (StartsIdent(ch, Peek(aErrorCode))) {
 983       aToken.mType = eCSSToken_ID;
 984     }
 985     return GatherIdent(aErrorCode, ch, aToken.mIdent);
 986   }
 987
 988   // No ident chars after the '#'.  Just unread |ch| and get out of here.
 989   Pushback(ch);
 990   return PR_TRUE;
 991 }
 992
 993 PRBool nsCSSScanner::ParseIdent(nsresult& aErrorCode,
 994                                 PRInt32 aChar,
 995                                 nsCSSToken& aToken)
 996 {
 997   nsString& ident = aToken.mIdent;
 998   ident.SetLength(0);
 999   if (!GatherIdent(aErrorCode, aChar, ident)) {
1000     return PR_FALSE;
1001   }
1002
1003   nsCSSTokenType tokenType = eCSSToken_Ident;
1004   // look for functions (ie: "ident(")
1005   if (PRUnichar('(') == PRUnichar(Peek(aErrorCode))) { // this is a function definition
1006     tokenType = eCSSToken_Function;
1007   }
1008
1009   aToken.mType = tokenType;
1010   return PR_TRUE;
1011 }
1012
1013 PRBool nsCSSScanner::ParseAtKeyword(nsresult& aErrorCode, PRInt32 aChar,
1014                                     nsCSSToken& aToken)
1015 {
1016   aToken.mIdent.SetLength(0);
1017   aToken.mType = eCSSToken_AtKeyword;
1018   return GatherIdent(aErrorCode, 0, aToken.mIdent);
1019 }
1020
1021 PRBool nsCSSScanner::ParseNumber(nsresult& aErrorCode, PRInt32 c,
1022                                  nsCSSToken& aToken)
1023 {
1024   nsString& ident = aToken.mIdent;
1025   ident.SetLength(0);
1026   PRBool gotDot = (c == '.');
1027   aToken.mHasSign = (c == '+' || c == '-');
1028   if (c != '+') {
1029     ident.Append(PRUnichar(c));
1030   }
1031
1032   // Gather up characters that make up the number
1033   PRBool gotE = PR_FALSE;
1034   for (;;) {
1035     c = Read(aErrorCode);
1036     if (c < 0) break;
1037     if (!gotDot  && !gotE && (c == '.') &&
1038         IsDigit(Peek(aErrorCode))) {
1039       gotDot = PR_TRUE;
1040 #ifdef MOZ_SVG
1041     } else if (!gotE && (c == 'e' || c == 'E')) {
1042       if (!IsSVGMode()) {
1043         break;
1044       }
1045       PRInt32 nextChar = Peek(aErrorCode);
1046       PRInt32 sign = 0;
1047       if (nextChar == '-' || nextChar == '+') {
1048         sign = Read(aErrorCode);
1049         nextChar = Peek(aErrorCode);
1050       }
1051       if (IsDigit(nextChar)) {
1052         gotE = PR_TRUE;
1053         if (sign) {
1054           ident.Append(PRUnichar(c));
1055           c = sign;
1056         }
1057       } else {
1058         if (sign) {
1059           Pushback(sign);
1060         }
1061         break;
1062       }
1063 #endif
1064     } else if (!IsDigit(c)) {
1065       break;
1066     }
1067     ident.Append(PRUnichar(c));
1068   }
1069
1070   // Convert number to floating point
1071   nsCSSTokenType type = eCSSToken_Number;
1072   PRInt32 ec;
1073   float value = ident.ToFloat(&ec);
1074
1075   // Set mIntegerValid for all cases (except %, below) because we need
1076   // it for the "2n" in :nth-child(2n).
1077   aToken.mIntegerValid = PR_FALSE;
1078   if (!gotDot && !gotE) {
1079     aToken.mInteger = ident.ToInteger(&ec);
1080     aToken.mIntegerValid = PR_TRUE;
1081   }
1082   ident.SetLength(0);
1083
1084   // Look at character that terminated the number
1085   if (c >= 0) {
1086     if (StartsIdent(c, Peek(aErrorCode))) {
1087       if (!GatherIdent(aErrorCode, c, ident)) {
1088         return PR_FALSE;
1089       }
1090       type = eCSSToken_Dimension;
1091     } else if ('%' == c) {
1092       type = eCSSToken_Percentage;
1093       value = value / 100.0f;
1094       aToken.mIntegerValid = PR_FALSE;
1095     } else {
1096       // Put back character that stopped numeric scan
1097       Pushback(c);
1098     }
1099   }
1100   aToken.mNumber = value;
1101   aToken.mType = type;
1102   return PR_TRUE;
1103 }
1104
1105 PRBool nsCSSScanner::SkipCComment(nsresult& aErrorCode)
1106 {
1107   for (;;) {
1108     PRInt32 ch = Read(aErrorCode);
1109     if (ch < 0) break;
1110     if (ch == '*') {
1111       if (LookAhead(aErrorCode, '/')) {
1112         return PR_TRUE;
1113       }
1114     }
1115   }
1116
1117   REPORT_UNEXPECTED_EOF(PECommentEOF);
1118   return PR_FALSE;
1119 }
1120
1121 #if 0
1122 PRBool nsCSSScanner::ParseCComment(nsresult& aErrorCode, nsCSSToken& aToken)
1123 {
1124   nsString& ident = aToken.mIdent;
1125   for (;;) {
1126     PRInt32 ch = Read(aErrorCode);
1127     if (ch < 0) break;
1128     if (ch == '*') {
1129       if (LookAhead(aErrorCode, '/')) {
1130         ident.Append(PRUnichar(ch));
1131         ident.Append(PRUnichar('/'));
1132         break;
1133       }
1134     }
1135 #ifdef COLLECT_WHITESPACE
1136     ident.Append(PRUnichar(ch));
1137 #endif
1138   }
1139   aToken.mType = eCSSToken_WhiteSpace;
1140   return PR_TRUE;
1141 }
1142 #endif
1143
1144 #if 0
1145 PRBool nsCSSScanner::ParseEOLComment(nsresult& aErrorCode, nsCSSToken& aToken)
1146 {
1147   nsString& ident = aToken.mIdent;
1148   ident.SetLength(0);
1149   for (;;) {
1150     if (EatNewline(aErrorCode)) {
1151       break;
1152     }
1153     PRInt32 ch = Read(aErrorCode);
1154     if (ch < 0) {
1155       break;
1156     }
1157 #ifdef COLLECT_WHITESPACE
1158     ident.Append(PRUnichar(ch));
1159 #endif
1160   }
1161   aToken.mType = eCSSToken_WhiteSpace;
1162   return PR_TRUE;
1163 }
1164 #endif // 0
1165
1166 PRBool nsCSSScanner::ParseString(nsresult& aErrorCode, PRInt32 aStop,
1167                                  nsCSSToken& aToken)
1168 {
1169   aToken.mIdent.SetLength(0);
1170   aToken.mType = eCSSToken_String;
1171   aToken.mSymbol = PRUnichar(aStop); // remember how it's quoted
1172   for (;;) {
1173     // If nothing in pushback, first try to get as much as possible in one go
1174     if (!mPushbackCount && EnsureData(aErrorCode)) {
1175       // See how much we can consume and append in one go
1176       PRUint32 n = mOffset;
1177       // Count number of characters that can be processed
1178       for (;n < mCount; ++n) {
1179         PRUnichar nextChar = mReadPointer[n];
1180         if ((nextChar == aStop) || (nextChar == CSS_ESCAPE) ||
1181             (nextChar == '\n') || (nextChar == '\r') || (nextChar == '\f')) {
1182           break;
1183         }
1184 #ifdef CSS_REPORT_PARSE_ERRORS
1185         if (nextChar == '\t') {
1186           mColNumber = ((mColNumber - 1 + TAB_STOP_WIDTH) / TAB_STOP_WIDTH)
1187                        * TAB_STOP_WIDTH;
1188         } else {
1189           ++mColNumber;
1190         }
1191 #endif
1192       }
1193       // Add to the token what we have so far
1194       if (n > mOffset) {
1195         aToken.mIdent.Append(&mReadPointer[mOffset], n - mOffset);
1196         mOffset = n;
1197       }
1198     }
1199     PRInt32 ch = Read(aErrorCode);
1200     if (ch < 0 || ch == aStop) {
1201       break;
1202     }
1203     if (ch == '\n') {
1204       aToken.mType = eCSSToken_Error;
1205 #ifdef CSS_REPORT_PARSE_ERRORS
1206       ReportUnexpectedToken(aToken, "SEUnterminatedString");
1207 #endif
1208       break;
1209     }
1210     if (ch == CSS_ESCAPE) {
1211       ParseAndAppendEscape(aErrorCode, aToken.mIdent);
1212     } else {
1213       aToken.mIdent.Append(ch);
1214     }
1215   }
1216   return PR_TRUE;
1217 }