layout/style/nsCSSScanner.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* ***** BEGIN LICENSE BLOCK *****
   3  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   4  *
   5  * The contents of this file are subject to the Mozilla Public License Version
   6  * 1.1 (the "License"); you may not use this file except in compliance with
   7  * the License. You may obtain a copy of the License at
   8  * http://www.mozilla.org/MPL/
   9  *
  10  * Software distributed under the License is distributed on an "AS IS" basis,
  11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  12  * for the specific language governing rights and limitations under the
  13  * License.
  14  *
  15  * The Original Code is mozilla.org code.
  16  *
  17  * The Initial Developer of the Original Code is
  18  * Netscape Communications Corporation.
  19  * Portions created by the Initial Developer are Copyright (C) 1998
  20  * the Initial Developer. All Rights Reserved.
  21  *
  22  * Contributor(s):
  23  *   L. David Baron <dbaron@dbaron.org>
  24  *   Daniel Glazman <glazman@netscape.com>
  25  *
  26  * Alternatively, the contents of this file may be used under the terms of
  27  * either of the GNU General Public License Version 2 or later (the "GPL"),
  28  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  29  * in which case the provisions of the GPL or the LGPL are applicable instead
  30  * of those above. If you wish to allow use of your version of this file only
  31  * under the terms of either the GPL or the LGPL, and not to allow others to
  32  * use your version of this file under the terms of the MPL, indicate your
  33  * decision by deleting the provisions above and replace them with the notice
  34  * and other provisions required by the GPL or the LGPL. If you do not delete
  35  * the provisions above, a recipient may use your version of this file under
  36  * the terms of any one of the MPL, the GPL or the LGPL.
  37  *
  38  * ***** END LICENSE BLOCK ***** */
  39
  40 /* tokenization of CSS style sheets */
  41
  42 #include "nsCSSScanner.h"
  43 #include "nsIFactory.h"
  44 #include "nsIInputStream.h"
  45 #include "nsIUnicharInputStream.h"
  46 #include "nsString.h"
  47 #include "nsCRT.h"
  48
  49 // for #ifdef CSS_REPORT_PARSE_ERRORS
  50 #include "nsCOMPtr.h"
  51 #include "nsIServiceManager.h"
  52 #include "nsIComponentManager.h"
  53 #include "nsReadableUtils.h"
  54 #include "nsIURI.h"
  55 #include "nsIConsoleService.h"
  56 #include "nsIScriptError.h"
  57 #include "nsIStringBundle.h"
  58 #include "nsContentUtils.h"
  59
  60 // Don't bother collecting whitespace characters in token's mIdent buffer
  61 #undef COLLECT_WHITESPACE
  62
  63 static const PRUnichar CSS_ESCAPE = PRUnichar('\\');
  64 static const PRUint8 IS_DIGIT = 0x01;
  65 static const PRUint8 IS_HEX_DIGIT = 0x02;
  66 static const PRUint8 START_IDENT = 0x04;
  67 static const PRUint8 IS_IDENT = 0x08;
  68 static const PRUint8 IS_WHITESPACE = 0x10;
  69
  70 static PRBool gLexTableSetup = PR_FALSE;
  71 static PRUint8 gLexTable[256];
  72
  73 #ifdef CSS_REPORT_PARSE_ERRORS
  74 static PRBool gReportErrors = PR_TRUE;
  75 static nsIConsoleService *gConsoleService;
  76 static nsIFactory *gScriptErrorFactory;
  77 static nsIStringBundle *gStringBundle;
  78 #endif
  79
  80 static void
  81 BuildLexTable()
  82 {
  83   gLexTableSetup = PR_TRUE;
  84
  85   PRUint8* lt = gLexTable;
  86   int i;
  87   lt[CSS_ESCAPE] = START_IDENT;
  88   lt['-'] |= IS_IDENT;
  89   lt['_'] |= IS_IDENT | START_IDENT;
  90   lt[' '] |= IS_WHITESPACE;   // space
  91   lt['\t'] |= IS_WHITESPACE;  // horizontal tab
  92   lt['\r'] |= IS_WHITESPACE;  // carriage return
  93   lt['\n'] |= IS_WHITESPACE;  // line feed
  94   lt['\f'] |= IS_WHITESPACE;  // form feed
  95   for (i = 161; i <= 255; i++) {
  96     lt[i] |= IS_IDENT | START_IDENT;
  97   }
  98   for (i = '0'; i <= '9'; i++) {
  99     lt[i] |= IS_DIGIT | IS_HEX_DIGIT | IS_IDENT;
 100   }
 101   for (i = 'A'; i <= 'Z'; i++) {
 102     if ((i >= 'A') && (i <= 'F')) {
 103       lt[i] |= IS_HEX_DIGIT;
 104       lt[i+32] |= IS_HEX_DIGIT;
 105     }
 106     lt[i] |= IS_IDENT | START_IDENT;
 107     lt[i+32] |= IS_IDENT | START_IDENT;
 108   }
 109 }
 110
 111 static inline PRBool
 112 IsIdentStart(PRInt32 aChar)
 113 {
 114   return aChar >= 0 &&
 115     (aChar >= 256 || (gLexTable[aChar] & START_IDENT) != 0);
 116 }
 117
 118 static inline PRBool
 119 StartsIdent(PRInt32 aFirstChar, PRInt32 aSecondChar)
 120 {
 121   return IsIdentStart(aFirstChar) ||
 122     (aFirstChar == '-' && IsIdentStart(aSecondChar));
 123 }
 124
 125 static inline PRBool
 126 IsWhitespace(PRInt32 ch) {
 127   return PRUint32(ch) < 256 && (gLexTable[ch] & IS_WHITESPACE) != 0;
 128 }
 129
 130 static inline PRBool
 131 IsDigit(PRInt32 ch) {
 132   return PRUint32(ch) < 256 && (gLexTable[ch] & IS_DIGIT) != 0;
 133 }
 134
 135 static inline PRBool
 136 IsHexDigit(PRInt32 ch) {
 137   return PRUint32(ch) < 256 && (gLexTable[ch] & IS_HEX_DIGIT) != 0;
 138 }
 139
 140 static inline PRBool
 141 IsIdent(PRInt32 ch) {
 142   return ch >= 0 && (ch >= 256 || (gLexTable[ch] & IS_IDENT) != 0);
 143 }
 144
 145 nsCSSToken::nsCSSToken()
 146 {
 147   mType = eCSSToken_Symbol;
 148 }
 149
 150 void
 151 nsCSSToken::AppendToString(nsString& aBuffer)
 152 {
 153   switch (mType) {
 154     case eCSSToken_AtKeyword:
 155       aBuffer.Append(PRUnichar('@')); // fall through intentional
 156     case eCSSToken_Ident:
 157     case eCSSToken_WhiteSpace:
 158     case eCSSToken_Function:
 159     case eCSSToken_URL:
 160     case eCSSToken_InvalidURL:
 161     case eCSSToken_HTMLComment:
 162       aBuffer.Append(mIdent);
 163       break;
 164     case eCSSToken_Number:
 165       if (mIntegerValid) {
 166         aBuffer.AppendInt(mInteger, 10);
 167       }
 168       else {
 169         aBuffer.AppendFloat(mNumber);
 170       }
 171       break;
 172     case eCSSToken_Percentage:
 173       NS_ASSERTION(!mIntegerValid, "How did a percentage token get this set?");
 174       aBuffer.AppendFloat(mNumber * 100.0f);
 175       aBuffer.Append(PRUnichar('%')); // STRING USE WARNING: technically, this should be |AppendWithConversion|
 176       break;
 177     case eCSSToken_Dimension:
 178       if (mIntegerValid) {
 179         aBuffer.AppendInt(mInteger, 10);
 180       }
 181       else {
 182         aBuffer.AppendFloat(mNumber);
 183       }
 184       aBuffer.Append(mIdent);
 185       break;
 186     case eCSSToken_String:
 187       aBuffer.Append(mSymbol);
 188       aBuffer.Append(mIdent); // fall through intentional
 189     case eCSSToken_Symbol:
 190       aBuffer.Append(mSymbol);
 191       break;
 192     case eCSSToken_ID:
 193     case eCSSToken_Ref:
 194       aBuffer.Append(PRUnichar('#'));
 195       aBuffer.Append(mIdent);
 196       break;
 197     case eCSSToken_Includes:
 198       aBuffer.AppendLiteral("~=");
 199       break;
 200     case eCSSToken_Dashmatch:
 201       aBuffer.AppendLiteral("|=");
 202       break;
 203     case eCSSToken_Beginsmatch:
 204       aBuffer.AppendLiteral("^=");
 205       break;
 206     case eCSSToken_Endsmatch:
 207       aBuffer.AppendLiteral("$=");
 208       break;
 209     case eCSSToken_Containsmatch:
 210       aBuffer.AppendLiteral("*=");
 211       break;
 212     case eCSSToken_Error:
 213       aBuffer.Append(mSymbol);
 214       aBuffer.Append(mIdent);
 215       break;
 216     default:
 217       NS_ERROR("invalid token type");
 218       break;
 219   }
 220 }
 221
 222 nsCSSScanner::nsCSSScanner()
 223   : mInputStream(nsnull)
 224   , mReadPointer(nsnull)
 225   , mLowLevelError(NS_OK)
 226 #ifdef MOZ_SVG
 227   , mSVGMode(PR_FALSE)
 228 #endif
 229 #ifdef CSS_REPORT_PARSE_ERRORS
 230   , mError(mErrorBuf, NS_ARRAY_LENGTH(mErrorBuf), 0)
 231 #endif
 232 {
 233   MOZ_COUNT_CTOR(nsCSSScanner);
 234   if (!gLexTableSetup) {
 235     // XXX need a monitor
 236     BuildLexTable();
 237   }
 238   mPushback = mLocalPushback;
 239   mPushbackSize = NS_ARRAY_LENGTH(mLocalPushback);
 240   // No need to init the other members, since they represent state
 241   // which can get cleared.  We'll init them every time Init() is
 242   // called.
 243 }
 244
 245 nsCSSScanner::~nsCSSScanner()
 246 {
 247   MOZ_COUNT_DTOR(nsCSSScanner);
 248   Close();
 249   if (mLocalPushback != mPushback) {
 250     delete [] mPushback;
 251   }
 252 }
 253
 254 nsresult
 255 nsCSSScanner::GetLowLevelError()
 256 {
 257   return mLowLevelError;
 258 }
 259
 260 void
 261 nsCSSScanner::SetLowLevelError(nsresult aErrorCode)
 262 {
 263   NS_ASSERTION(aErrorCode != NS_OK, "SetLowLevelError() used to clear error");
 264   NS_ASSERTION(mLowLevelError == NS_OK, "there is already a low-level error");
 265   mLowLevelError = aErrorCode;
 266 }
 267
 268 #ifdef CSS_REPORT_PARSE_ERRORS
 269 #define CSS_ERRORS_PREF "layout.css.report_errors"
 270
 271 PR_STATIC_CALLBACK(int)
 272 CSSErrorsPrefChanged(const char *aPref, void *aClosure)
 273 {
 274   gReportErrors = nsContentUtils::GetBoolPref(CSS_ERRORS_PREF, PR_TRUE);
 275   return NS_OK;
 276 }
 277 #endif
 278
 279 /* static */ PRBool
 280 nsCSSScanner::InitGlobals()
 281 {
 282 #ifdef CSS_REPORT_PARSE_ERRORS
 283   if (gConsoleService && gScriptErrorFactory)
 284     return PR_TRUE;
 285
 286   nsresult rv = CallGetService(NS_CONSOLESERVICE_CONTRACTID, &gConsoleService);
 287   NS_ENSURE_SUCCESS(rv, PR_FALSE);
 288
 289   rv = CallGetClassObject(NS_SCRIPTERROR_CONTRACTID, &gScriptErrorFactory);
 290   NS_ENSURE_SUCCESS(rv, PR_FALSE);
 291   NS_ASSERTION(gConsoleService && gScriptErrorFactory,
 292                "unexpected null pointer without failure");
 293
 294   nsContentUtils::RegisterPrefCallback(CSS_ERRORS_PREF, CSSErrorsPrefChanged, nsnull);
 295   CSSErrorsPrefChanged(CSS_ERRORS_PREF, nsnull);
 296 #endif
 297   return PR_TRUE;
 298 }
 299
 300 /* static */ void
 301 nsCSSScanner::ReleaseGlobals()
 302 {
 303 #ifdef CSS_REPORT_PARSE_ERRORS
 304   nsContentUtils::UnregisterPrefCallback(CSS_ERRORS_PREF, CSSErrorsPrefChanged, nsnull);
 305   NS_IF_RELEASE(gConsoleService);
 306   NS_IF_RELEASE(gScriptErrorFactory);
 307   NS_IF_RELEASE(gStringBundle);
 308 #endif
 309 }
 310
 311 void
 312 nsCSSScanner::Init(nsIUnicharInputStream* aInput,
 313                    const PRUnichar * aBuffer, PRUint32 aCount,
 314                    nsIURI* aURI, PRUint32 aLineNumber)
 315 {
 316   NS_PRECONDITION(!mInputStream, "Should not have an existing input stream!");
 317   NS_PRECONDITION(!mReadPointer, "Should not have an existing input buffer!");
 318
 319   // Read from stream via my own buffer
 320   if (aInput) {
 321     NS_PRECONDITION(!aBuffer, "Shouldn't have both input and buffer!");
 322     NS_PRECONDITION(aCount == 0, "Shouldn't have count with a stream");
 323     mInputStream = aInput;
 324     mReadPointer = mBuffer;
 325     mCount = 0;
 326   } else {
 327     NS_PRECONDITION(aBuffer, "Either aInput or aBuffer must be set");
 328     // Read directly from the provided buffer
 329     mInputStream = nsnull;
 330     mReadPointer = aBuffer;
 331     mCount = aCount;
 332   }
 333
 334 #ifdef CSS_REPORT_PARSE_ERRORS
 335   // If aURI is the same as mURI, no need to reget mFileName -- it
 336   // shouldn't have changed.
 337   if (aURI != mURI) {
 338     mURI = aURI;
 339     if (aURI) {
 340       aURI->GetSpec(mFileName);
 341     } else {
 342       mFileName.Adopt(NS_strdup("from DOM"));
 343     }
 344   }
 345 #endif // CSS_REPORT_PARSE_ERRORS
 346   mLineNumber = aLineNumber;
 347
 348   // Reset variables that we use to keep track of our progress through the input
 349   mOffset = 0;
 350   mPushbackCount = 0;
 351   mLowLevelError = NS_OK;
 352
 353 #ifdef CSS_REPORT_PARSE_ERRORS
 354   mColNumber = 0;
 355 #endif
 356 }
 357
 358 #ifdef CSS_REPORT_PARSE_ERRORS
 359
 360 // @see REPORT_UNEXPECTED_EOF in nsCSSParser.cpp
 361 #define REPORT_UNEXPECTED_EOF(lf_) \
 362   ReportUnexpectedEOF(#lf_)
 363
 364 void
 365 nsCSSScanner::AddToError(const nsSubstring& aErrorText)
 366 {
 367   if (mError.IsEmpty()) {
 368     mErrorLineNumber = mLineNumber;
 369     mErrorColNumber = mColNumber;
 370     mError = aErrorText;
 371   } else {
 372     mError.Append(NS_LITERAL_STRING("  ") + aErrorText);
 373   }
 374 }
 375
 376 void
 377 nsCSSScanner::ClearError()
 378 {
 379   mError.Truncate();
 380 }
 381
 382 void
 383 nsCSSScanner::OutputError()
 384 {
 385   if (mError.IsEmpty()) return;
 386
 387   // Log it to the Error console
 388
 389   if (InitGlobals() && gReportErrors) {
 390     nsresult rv;
 391     nsCOMPtr<nsIScriptError> errorObject =
 392       do_CreateInstance(gScriptErrorFactory, &rv);
 393     if (NS_SUCCEEDED(rv)) {
 394       rv = errorObject->Init(mError.get(),
 395                              NS_ConvertUTF8toUTF16(mFileName).get(),
 396                              EmptyString().get(),
 397                              mErrorLineNumber,
 398                              mErrorColNumber,
 399                              nsIScriptError::warningFlag,
 400                              "CSS Parser");
 401       if (NS_SUCCEEDED(rv))
 402         gConsoleService->LogMessage(errorObject);
 403     }
 404   }
 405   ClearError();
 406 }
 407
 408 static PRBool
 409 InitStringBundle()
 410 {
 411   if (gStringBundle)
 412     return PR_TRUE;
 413
 414   nsCOMPtr<nsIStringBundleService> sbs =
 415     do_GetService(NS_STRINGBUNDLE_CONTRACTID);
 416   if (!sbs)
 417     return PR_FALSE;
 418
 419   nsresult rv =
 420     sbs->CreateBundle("chrome://global/locale/css.properties", &gStringBundle);
 421   if (NS_FAILED(rv)) {
 422     gStringBundle = nsnull;
 423     return PR_FALSE;
 424   }
 425
 426   return PR_TRUE;
 427 }
 428
 429 #define ENSURE_STRINGBUNDLE \
 430   PR_BEGIN_MACRO if (!InitStringBundle()) return; PR_END_MACRO
 431
 432 // aMessage must take no parameters
 433 void nsCSSScanner::ReportUnexpected(const char* aMessage)
 434 {
 435   ENSURE_STRINGBUNDLE;
 436
 437   nsXPIDLString str;
 438   gStringBundle->GetStringFromName(NS_ConvertASCIItoUTF16(aMessage).get(),
 439                                    getter_Copies(str));
 440   AddToError(str);
 441 }
 442
 443 void
 444 nsCSSScanner::ReportUnexpectedParams(const char* aMessage,
 445                                      const PRUnichar **aParams,
 446                                      PRUint32 aParamsLength)
 447 {
 448   NS_PRECONDITION(aParamsLength > 0, "use the non-params version");
 449   ENSURE_STRINGBUNDLE;
 450
 451   nsXPIDLString str;
 452   gStringBundle->FormatStringFromName(NS_ConvertASCIItoUTF16(aMessage).get(),
 453                                       aParams, aParamsLength,
 454                                       getter_Copies(str));
 455   AddToError(str);
 456 }
 457
 458 // aLookingFor is a plain string, not a format string
 459 void
 460 nsCSSScanner::ReportUnexpectedEOF(const char* aLookingFor)
 461 {
 462   ENSURE_STRINGBUNDLE;
 463
 464   nsXPIDLString innerStr;
 465   gStringBundle->GetStringFromName(NS_ConvertASCIItoUTF16(aLookingFor).get(),
 466                                    getter_Copies(innerStr));
 467
 468   const PRUnichar *params[] = {
 469     innerStr.get()
 470   };
 471   nsXPIDLString str;
 472   gStringBundle->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
 473                                       params, NS_ARRAY_LENGTH(params),
 474                                       getter_Copies(str));
 475   AddToError(str);
 476 }
 477
 478 // aLookingFor is a single character
 479 void
 480 nsCSSScanner::ReportUnexpectedEOF(PRUnichar aLookingFor)
 481 {
 482   ENSURE_STRINGBUNDLE;
 483
 484   const PRUnichar lookingForStr[] = {
 485     PRUnichar('\''), aLookingFor, PRUnichar('\''), PRUnichar(0)
 486   };
 487   const PRUnichar *params[] = { lookingForStr };
 488   nsXPIDLString str;
 489   gStringBundle->FormatStringFromName(NS_LITERAL_STRING("PEUnexpEOF2").get(),
 490                                       params, NS_ARRAY_LENGTH(params),
 491                                       getter_Copies(str));
 492   AddToError(str);
 493 }
 494
 495 // aMessage must take 1 parameter (for the string representation of the
 496 // unexpected token)
 497 void
 498 nsCSSScanner::ReportUnexpectedToken(nsCSSToken& tok,
 499                                     const char *aMessage)
 500 {
 501   ENSURE_STRINGBUNDLE;
 502
 503   nsAutoString tokenString;
 504   tok.AppendToString(tokenString);
 505
 506   const PRUnichar *params[] = {
 507     tokenString.get()
 508   };
 509
 510   ReportUnexpectedParams(aMessage, params, NS_ARRAY_LENGTH(params));
 511 }
 512
 513 // aParams's first entry must be null, and we'll fill in the token
 514 void
 515 nsCSSScanner::ReportUnexpectedTokenParams(nsCSSToken& tok,
 516                                           const char* aMessage,
 517                                           const PRUnichar **aParams,
 518                                           PRUint32 aParamsLength)
 519 {
 520   NS_PRECONDITION(aParamsLength > 1, "use the non-params version");
 521   NS_PRECONDITION(aParams[0] == nsnull, "first param should be empty");
 522
 523   ENSURE_STRINGBUNDLE;
 524
 525   nsAutoString tokenString;
 526   tok.AppendToString(tokenString);
 527   aParams[0] = tokenString.get();
 528
 529   ReportUnexpectedParams(aMessage, aParams, aParamsLength);
 530 }
 531
 532 #else
 533
 534 #define REPORT_UNEXPECTED_EOF(lf_)
 535
 536 #endif // CSS_REPORT_PARSE_ERRORS
 537
 538 void
 539 nsCSSScanner::Close()
 540 {
 541   mInputStream = nsnull;
 542   mReadPointer = nsnull;
 543
 544   // Clean things up so we don't hold on to memory if our parser gets recycled.
 545 #ifdef CSS_REPORT_PARSE_ERRORS
 546   mFileName.Truncate();
 547   mURI = nsnull;
 548   mError.Truncate();
 549 #endif
 550   if (mPushback != mLocalPushback) {
 551     delete [] mPushback;
 552     mPushback = mLocalPushback;
 553     mPushbackSize = NS_ARRAY_LENGTH(mLocalPushback);
 554   }
 555 }
 556
 557 #ifdef CSS_REPORT_PARSE_ERRORS
 558 #define TAB_STOP_WIDTH 8
 559 #endif
 560
 561 PRBool
 562 nsCSSScanner::EnsureData()
 563 {
 564   if (mOffset < mCount)
 565     return PR_TRUE;
 566
 567   if (!mInputStream)
 568     return PR_FALSE;
 569
 570   mOffset = 0;
 571   nsresult rv = mInputStream->Read(mBuffer, CSS_BUFFER_SIZE, &mCount);
 572
 573   if (NS_FAILED(rv)) {
 574     mCount = 0;
 575     SetLowLevelError(rv);
 576     return PR_FALSE;
 577   }
 578
 579   return mCount > 0;
 580 }
 581
 582 // Returns -1 on error or eof
 583 PRInt32
 584 nsCSSScanner::Read()
 585 {
 586   PRInt32 rv;
 587   if (0 < mPushbackCount) {
 588     rv = PRInt32(mPushback[--mPushbackCount]);
 589   } else {
 590     if (mOffset == mCount && !EnsureData()) {
 591       return -1;
 592     }
 593     rv = PRInt32(mReadPointer[mOffset++]);
 594     // There are four types of newlines in CSS: "\r", "\n", "\r\n", and "\f".
 595     // To simplify dealing with newlines, they are all normalized to "\n" here
 596     if (rv == '\r') {
 597       if (EnsureData() && mReadPointer[mOffset] == '\n') {
 598         mOffset++;
 599       }
 600       rv = '\n';
 601     } else if (rv == '\f') {
 602       rv = '\n';
 603     }
 604     if (rv == '\n') {
 605       // 0 is a magical line number meaning that we don't know (i.e., script)
 606       if (mLineNumber != 0)
 607         ++mLineNumber;
 608 #ifdef CSS_REPORT_PARSE_ERRORS
 609       mColNumber = 0;
 610 #endif
 611     }
 612 #ifdef CSS_REPORT_PARSE_ERRORS
 613     else if (rv == '\t') {
 614       mColNumber = ((mColNumber - 1 + TAB_STOP_WIDTH) / TAB_STOP_WIDTH)
 615                    * TAB_STOP_WIDTH;
 616     } else if (rv != '\n') {
 617       mColNumber++;
 618     }
 619 #endif
 620   }
 621 //printf("Read => %x\n", rv);
 622   return rv;
 623 }
 624
 625 PRInt32
 626 nsCSSScanner::Peek()
 627 {
 628   if (0 == mPushbackCount) {
 629     PRInt32 ch = Read();
 630     if (ch < 0) {
 631       return -1;
 632     }
 633     mPushback[0] = PRUnichar(ch);
 634     mPushbackCount++;
 635   }
 636 //printf("Peek => %x\n", mLookAhead);
 637   return PRInt32(mPushback[mPushbackCount - 1]);
 638 }
 639
 640 void
 641 nsCSSScanner::Pushback(PRUnichar aChar)
 642 {
 643   if (mPushbackCount == mPushbackSize) { // grow buffer
 644     PRUnichar*  newPushback = new PRUnichar[mPushbackSize + 4];
 645     if (nsnull == newPushback) {
 646       return;
 647     }
 648     mPushbackSize += 4;
 649     memcpy(newPushback, mPushback, sizeof(PRUnichar) * mPushbackCount);
 650     if (mPushback != mLocalPushback) {
 651       delete [] mPushback;
 652     }
 653     mPushback = newPushback;
 654   }
 655   mPushback[mPushbackCount++] = aChar;
 656 }
 657
 658 PRBool
 659 nsCSSScanner::LookAhead(PRUnichar aChar)
 660 {
 661   PRInt32 ch = Read();
 662   if (ch < 0) {
 663     return PR_FALSE;
 664   }
 665   if (ch == aChar) {
 666     return PR_TRUE;
 667   }
 668   Pushback(ch);
 669   return PR_FALSE;
 670 }
 671
 672 PRBool
 673 nsCSSScanner::EatWhiteSpace()
 674 {
 675   PRBool eaten = PR_FALSE;
 676   for (;;) {
 677     PRInt32 ch = Read();
 678     if (ch < 0) {
 679       break;
 680     }
 681     if ((ch == ' ') || (ch == '\n') || (ch == '\t')) {
 682       eaten = PR_TRUE;
 683       continue;
 684     }
 685     Pushback(ch);
 686     break;
 687   }
 688   return eaten;
 689 }
 690
 691 PRBool
 692 nsCSSScanner::EatNewline()
 693 {
 694   PRInt32 ch = Read();
 695   if (ch < 0) {
 696     return PR_FALSE;
 697   }
 698   PRBool eaten = PR_FALSE;
 699   if (ch == '\n') {
 700     eaten = PR_TRUE;
 701   } else {
 702     Pushback(ch);
 703   }
 704   return eaten;
 705 }
 706
 707 PRBool
 708 nsCSSScanner::Next(nsCSSToken& aToken)
 709 {
 710   PRInt32 ch = Read();
 711   if (ch < 0) {
 712     return PR_FALSE;
 713   }
 714
 715   // IDENT
 716   if (StartsIdent(ch, Peek()))
 717     return ParseIdent(ch, aToken);
 718
 719   // AT_KEYWORD
 720   if (ch == '@') {
 721     PRInt32 nextChar = Read();
 722     if (nextChar >= 0) {
 723       PRInt32 followingChar = Peek();
 724       Pushback(nextChar);
 725       if (StartsIdent(nextChar, followingChar))
 726         return ParseAtKeyword(ch, aToken);
 727     }
 728   }
 729
 730   // NUMBER or DIM
 731   if ((ch == '.') || (ch == '+') || (ch == '-')) {
 732     PRInt32 nextChar = Peek();
 733     if (IsDigit(nextChar)) {
 734       return ParseNumber(ch, aToken);
 735     }
 736     else if (('.' == nextChar) && ('.' != ch)) {
 737       nextChar = Read();
 738       PRInt32 followingChar = Peek();
 739       Pushback(nextChar);
 740       if (IsDigit(followingChar))
 741         return ParseNumber(ch, aToken);
 742     }
 743   }
 744   if (IsDigit(ch)) {
 745     return ParseNumber(ch, aToken);
 746   }
 747
 748   // ID
 749   if (ch == '#') {
 750     return ParseRef(ch, aToken);
 751   }
 752
 753   // STRING
 754   if ((ch == '"') || (ch == '\'')) {
 755     return ParseString(ch, aToken);
 756   }
 757
 758   // WS
 759   if (IsWhitespace(ch)) {
 760     aToken.mType = eCSSToken_WhiteSpace;
 761     aToken.mIdent.Assign(PRUnichar(ch));
 762     (void) EatWhiteSpace();
 763     return PR_TRUE;
 764   }
 765   if (ch == '/') {
 766     PRInt32 nextChar = Peek();
 767     if (nextChar == '*') {
 768       (void) Read();
 769 #if 0
 770       // If we change our storage data structures such that comments are
 771       // stored (for Editor), we should reenable this code, condition it
 772       // on being in editor mode, and apply glazou's patch from bug
 773       // 60290.
 774       aToken.mIdent.SetCapacity(2);
 775       aToken.mIdent.Assign(PRUnichar(ch));
 776       aToken.mIdent.Append(PRUnichar(nextChar));
 777       return ParseCComment(aToken);
 778 #endif
 779       return SkipCComment() && Next(aToken);
 780     }
 781   }
 782   if (ch == '<') {  // consume HTML comment tags
 783     if (LookAhead('!')) {
 784       if (LookAhead('-')) {
 785         if (LookAhead('-')) {
 786           aToken.mType = eCSSToken_HTMLComment;
 787           aToken.mIdent.AssignLiteral("<!--");
 788           return PR_TRUE;
 789         }
 790         Pushback('-');
 791       }
 792       Pushback('!');
 793     }
 794   }
 795   if (ch == '-') {  // check for HTML comment end
 796     if (LookAhead('-')) {
 797       if (LookAhead('>')) {
 798         aToken.mType = eCSSToken_HTMLComment;
 799         aToken.mIdent.AssignLiteral("-->");
 800         return PR_TRUE;
 801       }
 802       Pushback('-');
 803     }
 804   }
 805
 806   // INCLUDES ("~=") and DASHMATCH ("|=")
 807   if (( ch == '|' ) || ( ch == '~' ) || ( ch == '^' ) ||
 808       ( ch == '$' ) || ( ch == '*' )) {
 809     PRInt32 nextChar = Read();
 810     if ( nextChar == '=' ) {
 811       if (ch == '~') {
 812         aToken.mType = eCSSToken_Includes;
 813       }
 814       else if (ch == '|') {
 815         aToken.mType = eCSSToken_Dashmatch;
 816       }
 817       else if (ch == '^') {
 818         aToken.mType = eCSSToken_Beginsmatch;
 819       }
 820       else if (ch == '$') {
 821         aToken.mType = eCSSToken_Endsmatch;
 822       }
 823       else if (ch == '*') {
 824         aToken.mType = eCSSToken_Containsmatch;
 825       }
 826       return PR_TRUE;
 827     } else if (nextChar >= 0) {
 828       Pushback(nextChar);
 829     }
 830   }
 831   aToken.mType = eCSSToken_Symbol;
 832   aToken.mSymbol = ch;
 833   return PR_TRUE;
 834 }
 835
 836 PRBool
 837 nsCSSScanner::NextURL(nsCSSToken& aToken)
 838 {
 839   PRInt32 ch = Read();
 840   if (ch < 0) {
 841     return PR_FALSE;
 842   }
 843
 844   // STRING
 845   if ((ch == '"') || (ch == '\'')) {
 846     return ParseString(ch, aToken);
 847   }
 848
 849   // WS
 850   if (IsWhitespace(ch)) {
 851     aToken.mType = eCSSToken_WhiteSpace;
 852     aToken.mIdent.Assign(PRUnichar(ch));
 853     (void) EatWhiteSpace();
 854     return PR_TRUE;
 855   }
 856   if (ch == '/') {
 857     PRInt32 nextChar = Peek();
 858     if (nextChar == '*') {
 859       (void) Read();
 860 #if 0
 861       // If we change our storage data structures such that comments are
 862       // stored (for Editor), we should reenable this code, condition it
 863       // on being in editor mode, and apply glazou's patch from bug
 864       // 60290.
 865       aToken.mIdent.SetCapacity(2);
 866       aToken.mIdent.Assign(PRUnichar(ch));
 867       aToken.mIdent.Append(PRUnichar(nextChar));
 868       return ParseCComment(aToken);
 869 #endif
 870       return SkipCComment() && Next(aToken);
 871     }
 872   }
 873
 874   // Process a url lexical token. A CSS1 url token can contain
 875   // characters beyond identifier characters (e.g. '/', ':', etc.)
 876   // Because of this the normal rules for tokenizing the input don't
 877   // apply very well. To simplify the parser and relax some of the
 878   // requirements on the scanner we parse url's here. If we find a
 879   // malformed URL then we emit a token of type "InvalidURL" so that
 880   // the CSS1 parser can ignore the invalid input. We attempt to eat
 881   // the right amount of input data when an invalid URL is presented.
 882
 883   aToken.mType = eCSSToken_InvalidURL;
 884   nsString& ident = aToken.mIdent;
 885   ident.SetLength(0);
 886
 887   if (ch == ')') {
 888     Pushback(ch);
 889     // empty url spec; just get out of here
 890     aToken.mType = eCSSToken_URL;
 891   } else {
 892     // start of a non-quoted url
 893     Pushback(ch);
 894     PRBool ok = PR_TRUE;
 895     for (;;) {
 896       ch = Read();
 897       if (ch < 0) break;
 898       if (ch == CSS_ESCAPE) {
 899         ParseAndAppendEscape(ident);
 900       } else if ((ch == '"') || (ch == '\'') || (ch == '(')) {
 901         // This is an invalid URL spec
 902         ok = PR_FALSE;
 903       } else if (IsWhitespace(ch)) {
 904         // Whitespace is allowed at the end of the URL
 905         (void) EatWhiteSpace();
 906         if (LookAhead(')')) {
 907           Pushback(')');  // leave the closing symbol
 908           // done!
 909           break;
 910         }
 911         // Whitespace is followed by something other than a
 912         // ")". This is an invalid url spec.
 913         ok = PR_FALSE;
 914       } else if (ch == ')') {
 915         Pushback(ch);
 916         // All done
 917         break;
 918       } else {
 919         // A regular url character.
 920         ident.Append(PRUnichar(ch));
 921       }
 922     }
 923
 924     // If the result of the above scanning is ok then change the token
 925     // type to a useful one.
 926     if (ok) {
 927       aToken.mType = eCSSToken_URL;
 928     }
 929   }
 930   return PR_TRUE;
 931 }
 932
 933
 934 void
 935 nsCSSScanner::ParseAndAppendEscape(nsString& aOutput)
 936 {
 937   PRInt32 ch = Peek();
 938   if (ch < 0) {
 939     aOutput.Append(CSS_ESCAPE);
 940     return;
 941   }
 942   if (IsHexDigit(ch)) {
 943     PRInt32 rv = 0;
 944     int i;
 945     for (i = 0; i < 6; i++) { // up to six digits
 946       ch = Read();
 947       if (ch < 0) {
 948         // Whoops: error or premature eof
 949         break;
 950       }
 951       if (!IsHexDigit(ch) && !IsWhitespace(ch)) {
 952         Pushback(ch);
 953         break;
 954       } else if (IsHexDigit(ch)) {
 955         if (IsDigit(ch)) {
 956           rv = rv * 16 + (ch - '0');
 957         } else {
 958           // Note: c&7 just keeps the low three bits which causes
 959           // upper and lower case alphabetics to both yield their
 960           // "relative to 10" value for computing the hex value.
 961           rv = rv * 16 + ((ch & 0x7) + 9);
 962         }
 963       } else {
 964         NS_ASSERTION(IsWhitespace(ch), "bad control flow");
 965         // single space ends escape
 966         break;
 967       }
 968     }
 969     if (6 == i) { // look for trailing whitespace and eat it
 970       ch = Peek();
 971       if (IsWhitespace(ch)) {
 972         ch = Read();
 973       }
 974     }
 975     NS_ASSERTION(rv >= 0, "How did rv become negative?");
 976     if (rv > 0) {
 977       AppendUCS4ToUTF16(ENSURE_VALID_CHAR(rv), aOutput);
 978     }
 979     return;
 980   } else {
 981     // "Any character except a hexidecimal digit can be escaped to
 982     // remove its special meaning by putting a backslash in front"
 983     // -- CSS1 spec section 7.1
 984     if (!EatNewline()) { // skip escaped newline
 985       (void) Read();
 986       if (ch > 0) {
 987         aOutput.Append(ch);
 988       }
 989     }
 990     return;
 991   }
 992 }
 993
 994 /**
 995  * Gather up the characters in an identifier. The identfier was
 996  * started by "aChar" which will be appended to aIdent. The result
 997  * will be aIdent with all of the identifier characters appended
 998  * until the first non-identifier character is seen. The termination
 999  * character is unread for the future re-reading.
1000  */
1001 PRBool
1002 nsCSSScanner::GatherIdent(PRInt32 aChar, nsString& aIdent)
1003 {
1004   if (aChar == CSS_ESCAPE) {
1005     ParseAndAppendEscape(aIdent);
1006   }
1007   else if (0 < aChar) {
1008     aIdent.Append(aChar);
1009   }
1010   for (;;) {
1011     // If nothing in pushback, first try to get as much as possible in one go
1012     if (!mPushbackCount && EnsureData()) {
1013       // See how much we can consume and append in one go
1014       PRUint32 n = mOffset;
1015       // Count number of Ident characters that can be processed
1016       while (n < mCount && IsIdent(mReadPointer[n])) {
1017         ++n;
1018       }
1019       // Add to the token what we have so far
1020       if (n > mOffset) {
1021 #ifdef CSS_REPORT_PARSE_ERRORS
1022         mColNumber += n - mOffset;
1023 #endif
1024         aIdent.Append(&mReadPointer[mOffset], n - mOffset);
1025         mOffset = n;
1026       }
1027     }
1028
1029     aChar = Read();
1030     if (aChar < 0) break;
1031     if (aChar == CSS_ESCAPE) {
1032       ParseAndAppendEscape(aIdent);
1033     } else if (IsIdent(aChar)) {
1034       aIdent.Append(PRUnichar(aChar));
1035     } else {
1036       Pushback(aChar);
1037       break;
1038     }
1039   }
1040   return PR_TRUE;
1041 }
1042
1043 PRBool
1044 nsCSSScanner::ParseRef(PRInt32 aChar, nsCSSToken& aToken)
1045 {
1046   aToken.mIdent.SetLength(0);
1047   aToken.mType = eCSSToken_Ref;
1048   PRInt32 ch = Read();
1049   if (ch < 0) {
1050     return PR_FALSE;
1051   }
1052   if (IsIdent(ch) || ch == CSS_ESCAPE) {
1053     // First char after the '#' is a valid ident char (or an escape),
1054     // so it makes sense to keep going
1055     if (StartsIdent(ch, Peek())) {
1056       aToken.mType = eCSSToken_ID;
1057     }
1058     return GatherIdent(ch, aToken.mIdent);
1059   }
1060
1061   // No ident chars after the '#'.  Just unread |ch| and get out of here.
1062   Pushback(ch);
1063   return PR_TRUE;
1064 }
1065
1066 PRBool
1067 nsCSSScanner::ParseIdent(PRInt32 aChar, nsCSSToken& aToken)
1068 {
1069   nsString& ident = aToken.mIdent;
1070   ident.SetLength(0);
1071   if (!GatherIdent(aChar, ident)) {
1072     return PR_FALSE;
1073   }
1074
1075   nsCSSTokenType tokenType = eCSSToken_Ident;
1076   // look for functions (ie: "ident(")
1077   if (PRUnichar('(') == PRUnichar(Peek())) { // this is a function definition
1078     tokenType = eCSSToken_Function;
1079   }
1080
1081   aToken.mType = tokenType;
1082   return PR_TRUE;
1083 }
1084
1085 PRBool
1086 nsCSSScanner::ParseAtKeyword(PRInt32 aChar, nsCSSToken& aToken)
1087 {
1088   aToken.mIdent.SetLength(0);
1089   aToken.mType = eCSSToken_AtKeyword;
1090   return GatherIdent(0, aToken.mIdent);
1091 }
1092
1093 PRBool
1094 nsCSSScanner::ParseNumber(PRInt32 c, nsCSSToken& aToken)
1095 {
1096   nsString& ident = aToken.mIdent;
1097   ident.SetLength(0);
1098   PRBool gotDot = (c == '.');
1099   aToken.mHasSign = (c == '+' || c == '-');
1100   if (c != '+') {
1101     ident.Append(PRUnichar(c));
1102   }
1103
1104   // Gather up characters that make up the number
1105   PRBool gotE = PR_FALSE;
1106   for (;;) {
1107     c = Read();
1108     if (c < 0) break;
1109     if (!gotDot  && !gotE && (c == '.') &&
1110         IsDigit(Peek())) {
1111       gotDot = PR_TRUE;
1112 #ifdef MOZ_SVG
1113     } else if (!gotE && (c == 'e' || c == 'E')) {
1114       if (!IsSVGMode()) {
1115         break;
1116       }
1117       PRInt32 nextChar = Peek();
1118       PRInt32 sign = 0;
1119       if (nextChar == '-' || nextChar == '+') {
1120         sign = Read();
1121         nextChar = Peek();
1122       }
1123       if (IsDigit(nextChar)) {
1124         gotE = PR_TRUE;
1125         if (sign) {
1126           ident.Append(PRUnichar(c));
1127           c = sign;
1128         }
1129       } else {
1130         if (sign) {
1131           Pushback(sign);
1132         }
1133         break;
1134       }
1135 #endif
1136     } else if (!IsDigit(c)) {
1137       break;
1138     }
1139     ident.Append(PRUnichar(c));
1140   }
1141
1142   // Convert number to floating point
1143   nsCSSTokenType type = eCSSToken_Number;
1144   PRInt32 ec;
1145   float value = ident.ToFloat(&ec);
1146
1147   // Set mIntegerValid for all cases (except %, below) because we need
1148   // it for the "2n" in :nth-child(2n).
1149   aToken.mIntegerValid = PR_FALSE;
1150   if (!gotDot && !gotE) {
1151     aToken.mInteger = ident.ToInteger(&ec);
1152     aToken.mIntegerValid = PR_TRUE;
1153   }
1154   ident.SetLength(0);
1155
1156   // Look at character that terminated the number
1157   if (c >= 0) {
1158     if (StartsIdent(c, Peek())) {
1159       if (!GatherIdent(c, ident)) {
1160         return PR_FALSE;
1161       }
1162       type = eCSSToken_Dimension;
1163     } else if ('%' == c) {
1164       type = eCSSToken_Percentage;
1165       value = value / 100.0f;
1166       aToken.mIntegerValid = PR_FALSE;
1167     } else {
1168       // Put back character that stopped numeric scan
1169       Pushback(c);
1170     }
1171   }
1172   aToken.mNumber = value;
1173   aToken.mType = type;
1174   return PR_TRUE;
1175 }
1176
1177 PRBool
1178 nsCSSScanner::SkipCComment()
1179 {
1180   for (;;) {
1181     PRInt32 ch = Read();
1182     if (ch < 0) break;
1183     if (ch == '*') {
1184       if (LookAhead('/')) {
1185         return PR_TRUE;
1186       }
1187     }
1188   }
1189
1190   REPORT_UNEXPECTED_EOF(PECommentEOF);
1191   return PR_FALSE;
1192 }
1193
1194 #if 0
1195 PRBool
1196 nsCSSScanner::ParseCComment(nsCSSToken& aToken)
1197 {
1198   nsString& ident = aToken.mIdent;
1199   for (;;) {
1200     PRInt32 ch = Read();
1201     if (ch < 0) break;
1202     if (ch == '*') {
1203       if (LookAhead('/')) {
1204         ident.Append(PRUnichar(ch));
1205         ident.Append(PRUnichar('/'));
1206         break;
1207       }
1208     }
1209 #ifdef COLLECT_WHITESPACE
1210     ident.Append(PRUnichar(ch));
1211 #endif
1212   }
1213   aToken.mType = eCSSToken_WhiteSpace;
1214   return PR_TRUE;
1215 }
1216 #endif
1217
1218 #if 0
1219 PRBool
1220 nsCSSScanner::ParseEOLComment(nsCSSToken& aToken)
1221 {
1222   nsString& ident = aToken.mIdent;
1223   ident.SetLength(0);
1224   for (;;) {
1225     if (EatNewline()) {
1226       break;
1227     }
1228     PRInt32 ch = Read();
1229     if (ch < 0) {
1230       break;
1231     }
1232 #ifdef COLLECT_WHITESPACE
1233     ident.Append(PRUnichar(ch));
1234 #endif
1235   }
1236   aToken.mType = eCSSToken_WhiteSpace;
1237   return PR_TRUE;
1238 }
1239 #endif // 0
1240
1241 PRBool
1242 nsCSSScanner::ParseString(PRInt32 aStop, nsCSSToken& aToken)
1243 {
1244   aToken.mIdent.SetLength(0);
1245   aToken.mType = eCSSToken_String;
1246   aToken.mSymbol = PRUnichar(aStop); // remember how it's quoted
1247   for (;;) {
1248     // If nothing in pushback, first try to get as much as possible in one go
1249     if (!mPushbackCount && EnsureData()) {
1250       // See how much we can consume and append in one go
1251       PRUint32 n = mOffset;
1252       // Count number of characters that can be processed
1253       for (;n < mCount; ++n) {
1254         PRUnichar nextChar = mReadPointer[n];
1255         if ((nextChar == aStop) || (nextChar == CSS_ESCAPE) ||
1256             (nextChar == '\n') || (nextChar == '\r') || (nextChar == '\f')) {
1257           break;
1258         }
1259 #ifdef CSS_REPORT_PARSE_ERRORS
1260         if (nextChar == '\t') {
1261           mColNumber = ((mColNumber - 1 + TAB_STOP_WIDTH) / TAB_STOP_WIDTH)
1262                        * TAB_STOP_WIDTH;
1263         } else {
1264           ++mColNumber;
1265         }
1266 #endif
1267       }
1268       // Add to the token what we have so far
1269       if (n > mOffset) {
1270         aToken.mIdent.Append(&mReadPointer[mOffset], n - mOffset);
1271         mOffset = n;
1272       }
1273     }
1274     PRInt32 ch = Read();
1275     if (ch < 0 || ch == aStop) {
1276       break;
1277     }
1278     if (ch == '\n') {
1279       aToken.mType = eCSSToken_Error;
1280 #ifdef CSS_REPORT_PARSE_ERRORS
1281       ReportUnexpectedToken(aToken, "SEUnterminatedString");
1282 #endif
1283       break;
1284     }
1285     if (ch == CSS_ESCAPE) {
1286       ParseAndAppendEscape(aToken.mIdent);
1287     } else {
1288       aToken.mIdent.Append(ch);
1289     }
1290   }
1291   return PR_TRUE;
1292 }