sw/source/filter/html/parcss1.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <ctype.h>
  21 #include <stdlib.h>
  22 #include <limits.h>
  23 #include <rtl/ustrbuf.hxx>
  24 #include <vcl/svapp.hxx>
  25 #include <svtools/htmltokn.h>
  26 #include <comphelper/string.hxx>
  27 #include "css1kywd.hxx"
  28 #include "parcss1.hxx"
  29
  30 // Loop-Check: Um Endlos-Schleifen zu vermeiden, wird in jeder
  31 // Schalife geprueft, ob ein Fortschritt in der Eingabe-Position
  32 // stattgefunden hat
  33 #define LOOP_CHECK
  34
  35 #ifdef LOOP_CHECK
  36
  37 #define LOOP_CHECK_DECL \
  38     sal_Int32 nOldInPos = SAL_MAX_INT32;
  39 #define LOOP_CHECK_RESTART \
  40     nOldInPos = SAL_MAX_INT32;
  41 #define LOOP_CHECK_CHECK( where ) \
  42     OSL_ENSURE( nOldInPos!=nInPos || cNextCh==(sal_Unicode)EOF, where );    \
  43     if( nOldInPos==nInPos && cNextCh!=(sal_Unicode)EOF )                    \
  44         break;                                                              \
  45     else                                                                    \
  46         nOldInPos = nInPos;
  47
  48 #else
  49
  50 #define LOOP_CHECK_DECL
  51 #define LOOP_CHECK_RESTART
  52 #define LOOP_CHECK_CHECK( where )
  53
  54 #endif
  55
  56 const sal_Int32 MAX_LEN = 1024;
  57
  58 void CSS1Parser::InitRead( const OUString& rIn )
  59 {
  60     nlLineNr = 0;
  61     nlLinePos = 0;
  62
  63     bWhiteSpace = true; // Wenn noch nichts gelesen wurde ist das wie WS
  64     bEOF = false;
  65     eState = CSS1_PAR_WORKING;
  66     nValue = 0.;
  67
  68     aIn = rIn;
  69     nInPos = 0;
  70     cNextCh = GetNextChar();
  71     nToken = GetNextToken();
  72 }
  73
  74 sal_Unicode CSS1Parser::GetNextChar()
  75 {
  76     if( nInPos >= aIn.getLength() )
  77     {
  78         bEOF = true;
  79         return (sal_Unicode)EOF;
  80     }
  81
  82     sal_Unicode c = aIn[nInPos];
  83     nInPos++;
  84
  85     if( c == '\n' )
  86     {
  87         ++nlLineNr;
  88         nlLinePos = 1;
  89     }
  90     else
  91         ++nlLinePos;
  92
  93     return c;
  94 }
  95
  96 // Diese Funktion realisiert den in
  97
  98 //      http://www.w3.orh/pub/WWW/TR/WD-css1.html
  99 // bzw. http://www.w3.orh/pub/WWW/TR/WD-css1-960220.html
 100
 101 // beschriebenen Scanner fuer CSS1. Es handelt sich um eine direkte
 102 // Umsetzung der dort beschriebenen Lex-Grammatik
 103
 104 CSS1Token CSS1Parser::GetNextToken()
 105 {
 106     CSS1Token nRet = CSS1_NULL;
 107     aToken.clear();
 108
 109     do {
 110         // Merken, ob davor White-Space gelesen wurde
 111         bool bPrevWhiteSpace = bWhiteSpace;
 112         bWhiteSpace = false;
 113
 114         bool bNextCh = true;
 115         switch( cNextCh )
 116         {
 117         case '/': // COMMENT | '/'
 118             {
 119                 cNextCh = GetNextChar();
 120                 if( '*' == cNextCh )
 121                 {
 122                     // COMMENT
 123                     cNextCh = GetNextChar();
 124
 125                     bool bAsterisk = false;
 126                     while( !(bAsterisk && '/'==cNextCh) && !IsEOF() )
 127                     {
 128                         bAsterisk = ('*'==cNextCh);
 129                         cNextCh = GetNextChar();
 130                     }
 131                 }
 132                 else
 133                 {
 134                     // '/'
 135                     bNextCh = false;
 136                     nRet = CSS1_SLASH;
 137                 }
 138             }
 139             break;
 140
 141         case '@': // '@import' | '@XXX'
 142             {
 143                 cNextCh = GetNextChar();
 144                 if (rtl::isAsciiAlpha(cNextCh))
 145                 {
 146                     // den naechsten Identifer scannen
 147                     OUStringBuffer sTmpBuffer(32);
 148                     do {
 149                         sTmpBuffer.append( cNextCh );
 150                         cNextCh = GetNextChar();
 151                     } while( (rtl::isAsciiAlphanumeric(cNextCh) ||
 152                              '-' == cNextCh) && !IsEOF() );
 153
 154                     aToken += sTmpBuffer.makeStringAndClear();
 155
 156                     // und schauen, ob wir ihn kennen
 157                     switch( aToken[0] )
 158                     {
 159                     case 'i':
 160                     case 'I':
 161                         if( aToken.equalsIgnoreAsciiCase( "import" ) )
 162                             nRet = CSS1_IMPORT_SYM;
 163                         break;
 164                     case 'p':
 165                     case 'P':
 166                         if( aToken.equalsIgnoreAsciiCase( "page" ) )
 167                             nRet = CSS1_PAGE_SYM;
 168                         break;
 169                     }
 170
 171                     // Fehlerbehandlung: '@ident' und alles bis
 172                     // zu einem Semikolon der dem Ende des folgenden
 173                     // Blocks ignorieren
 174                     if( CSS1_NULL==nRet )
 175                     {
 176                         aToken.clear();
 177                         int nBlockLvl = 0;
 178                         sal_Unicode cQuoteCh = 0;
 179                         bool bDone = false, bEscape = false;
 180                         while( !bDone && !IsEOF() )
 181                         {
 182                             bool bOldEscape = bEscape;
 183                             bEscape = false;
 184                             switch( cNextCh )
 185                             {
 186                             case '{':
 187                                 if( !cQuoteCh && !bOldEscape )
 188                                     nBlockLvl++;
 189                                 break;
 190                             case ';':
 191                                 if( !cQuoteCh && !bOldEscape )
 192                                     bDone = nBlockLvl==0;
 193                                 break;
 194                             case '}':
 195                                 if( !cQuoteCh && !bOldEscape )
 196                                     bDone = --nBlockLvl==0;
 197                                 break;
 198                             case '\"':
 199                             case '\'':
 200                                 if( !bOldEscape )
 201                                 {
 202                                     if( cQuoteCh )
 203                                     {
 204                                         if( cQuoteCh == cNextCh )
 205                                             cQuoteCh = 0;
 206                                     }
 207                                     else
 208                                     {
 209                                         cQuoteCh = cNextCh;
 210                                     }
 211                                 }
 212                                 break;
 213                             case '\\':
 214                                 if( !bOldEscape )
 215                                     bEscape = true;
 216                                 break;
 217                             }
 218                             cNextCh = GetNextChar();
 219                         }
 220                     }
 221
 222                     bNextCh = false;
 223                 }
 224             }
 225             break;
 226
 227         case '!': // '!' 'legal' | '!' 'important' | syntax error
 228             {
 229                 // White Space ueberlesen
 230                 cNextCh = GetNextChar();
 231                 while( ( ' ' == cNextCh ||
 232                        (cNextCh >= 0x09 && cNextCh <= 0x0d) ) && !IsEOF() )
 233                 {
 234                     bWhiteSpace = true;
 235                     cNextCh = GetNextChar();
 236                 }
 237
 238                 if( 'i'==cNextCh || 'I'==cNextCh)
 239                 {
 240                     // den naechsten Identifer scannen
 241                     OUStringBuffer sTmpBuffer(32);
 242                     do {
 243                         sTmpBuffer.append( cNextCh );
 244                         cNextCh = GetNextChar();
 245                     } while( (rtl::isAsciiAlphanumeric(cNextCh) ||
 246                              '-' == cNextCh) && !IsEOF() );
 247
 248                     aToken += sTmpBuffer.makeStringAndClear();
 249
 250                     if( ( 'i'==aToken[0] || 'I'==aToken[0] ) &&
 251                         aToken.equalsIgnoreAsciiCase( "important" ) )
 252                     {
 253                         // '!' 'important'
 254                         nRet = CSS1_IMPORTANT_SYM;
 255                     }
 256                     else
 257                     {
 258                         // Fehlerbehandlung: '!' ignorieren, IDENT nicht
 259                         nRet = CSS1_IDENT;
 260                     }
 261
 262                     bWhiteSpace = false;
 263                     bNextCh = false;
 264                 }
 265                 else
 266                 {
 267                     // Fehlerbehandlung: '!' ignorieren
 268                     bNextCh = false;
 269                 }
 270             }
 271             break;
 272
 273         case '\"':
 274         case '\'': // STRING
 275             {
 276                 // \... geht noch nicht!!!
 277                 sal_Unicode cQuoteChar = cNextCh;
 278                 cNextCh = GetNextChar();
 279
 280                 OUStringBuffer sTmpBuffer( MAX_LEN );
 281                 do {
 282                     sTmpBuffer.append( cNextCh );
 283                     cNextCh = GetNextChar();
 284                 } while( cQuoteChar != cNextCh && !IsEOF() );
 285
 286                 aToken += sTmpBuffer.toString();
 287
 288                 nRet = CSS1_STRING;
 289             }
 290             break;
 291
 292         case '0':
 293         case '1':
 294         case '2':
 295         case '3':
 296         case '4':
 297         case '5':
 298         case '6':
 299         case '7':
 300         case '8':
 301         case '9': // NUMBER | PERCENTAGE | LENGTH
 302             {
 303                 // die aktuelle Position retten
 304                 std::size_t nInPosSave = nInPos;
 305                 sal_Unicode cNextChSave = cNextCh;
 306                 sal_uInt32 nlLineNrSave = nlLineNr;
 307                 sal_uInt32 nlLinePosSave = nlLinePos;
 308                 bool bEOFSave = bEOF;
 309
 310                 // erstmal versuchen eine Hex-Zahl zu scannen
 311                 OUStringBuffer sTmpBuffer( 16 );
 312                 do {
 313                     sTmpBuffer.append( cNextCh );
 314                     cNextCh = GetNextChar();
 315                 } while( sTmpBuffer.getLength() < 7 &&
 316                          ( ('0'<=cNextCh && '9'>=cNextCh) ||
 317                            ('A'<=cNextCh && 'F'>=cNextCh) ||
 318                            ('a'<=cNextCh && 'f'>=cNextCh) ) &&
 319                          !IsEOF() );
 320
 321                 if( sTmpBuffer.getLength()==6 )
 322                 {
 323                     // wir haben eine hexadezimale Farbe gefunden
 324                     aToken += sTmpBuffer.makeStringAndClear();
 325                     nRet = CSS1_HEXCOLOR;
 326                     bNextCh = false;
 327
 328                     break;
 329                 }
 330
 331                 // sonst versuchen wir es mit einer Zahl
 332                 nInPos = nInPosSave;
 333                 cNextCh = cNextChSave;
 334                 nlLineNr = nlLineNrSave;
 335                 nlLinePos = nlLinePosSave;
 336                 bEOF = bEOFSave;
 337
 338                 // erstmal die Zahl scannen
 339                 sTmpBuffer.setLength( 0L );
 340                 do {
 341                     sTmpBuffer.append( cNextCh );
 342                     cNextCh = GetNextChar();
 343                 } while( (('0'<=cNextCh && '9'>=cNextCh) || '.'==cNextCh) &&
 344                          !IsEOF() );
 345
 346                 aToken += sTmpBuffer.makeStringAndClear();
 347                 nValue = aToken.toDouble();
 348
 349                 // White Space ueberlesen
 350                 while( ( ' ' == cNextCh ||
 351                        (cNextCh >= 0x09 && cNextCh <= 0x0d) ) && !IsEOF() )
 352                 {
 353                     bWhiteSpace = true;
 354                     cNextCh = GetNextChar();
 355                 }
 356
 357                 // und nun Schauen, ob es eine Einheit gibt
 358                 switch( cNextCh )
 359                 {
 360                 case '%': // PERCENTAGE
 361                     bWhiteSpace = false;
 362                     nRet = CSS1_PERCENTAGE;
 363                     break;
 364
 365                 case 'c':
 366                 case 'C': // LENGTH cm | LENGTH IDENT
 367                 case 'e':
 368                 case 'E': // LENGTH (em | ex) | LENGTH IDENT
 369                 case 'i':
 370                 case 'I': // LENGTH inch | LENGTH IDENT
 371                 case 'p':
 372                 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
 373                 case 'm':
 374                 case 'M': // LENGTH mm | LENGTH IDENT
 375                     {
 376                         // die aktuelle Position retten
 377                         sal_Int32 nInPosOld = nInPos;
 378                         sal_Unicode cNextChOld = cNextCh;
 379                         sal_uLong nlLineNrOld  = nlLineNr;
 380                         sal_uLong nlLinePosOld = nlLinePos;
 381                         bool bEOFOld = bEOF;
 382
 383                         // den naechsten Identifer scannen
 384                         OUString aIdent;
 385                         OUStringBuffer sTmpBuffer2(64);
 386                         do {
 387                             sTmpBuffer2.append( cNextCh );
 388                             cNextCh = GetNextChar();
 389                         } while( (rtl::isAsciiAlphanumeric(cNextCh) ||
 390                                  '-' == cNextCh) && !IsEOF() );
 391
 392                         aIdent += sTmpBuffer2.makeStringAndClear();
 393
 394                         // Ist es eine Einheit?
 395                         const sal_Char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
 396                         double nScale1 = 1., nScale2 = 1.;
 397                         CSS1Token nToken1 = CSS1_LENGTH,
 398                                   nToken2 = CSS1_LENGTH,
 399                                   nToken3 = CSS1_LENGTH;
 400                         switch( aIdent[0] )
 401                         {
 402                         case 'c':
 403                         case 'C':
 404                             pCmp1 = "cm";
 405                             nScale1 = (72.*20.)/2.54; // twip
 406                             break;
 407                         case 'e':
 408                         case 'E':
 409                             pCmp1 = "em";
 410                             nToken1 = CSS1_EMS;
 411
 412                             pCmp2 = "ex";
 413                             nToken2 = CSS1_EMX;
 414                             break;
 415                         case 'i':
 416                         case 'I':
 417                             pCmp1 = "in";
 418                             nScale1 = 72.*20.; // twip
 419                             break;
 420                         case 'm':
 421                         case 'M':
 422                             pCmp1 = "mm";
 423                             nScale1 = (72.*20.)/25.4; // twip
 424                             break;
 425                         case 'p':
 426                         case 'P':
 427                             pCmp1 = "pt";
 428                             nScale1 = 20.; // twip
 429
 430                             pCmp2 = "pc";
 431                             nScale2 = 12.*20.; // twip
 432
 433                             pCmp3 = "px";
 434                             nToken3 = CSS1_PIXLENGTH;
 435                             break;
 436                         }
 437
 438                         double nScale = 0.0;
 439                         OSL_ENSURE( pCmp1, "Wo kommt das erste Zeichen her?" );
 440                         if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
 441                         {
 442                             nScale = nScale1;
 443                             nRet = nToken1;
 444                         }
 445                         else if( pCmp2 &&
 446                                  aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
 447                         {
 448                             nScale = nScale2;
 449                             nRet = nToken2;
 450                         }
 451                         else if( pCmp3 &&
 452                                  aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
 453                         {
 454                             nScale =  1.; // nScale3
 455                             nRet = nToken3;
 456                         }
 457                         else
 458                         {
 459                             nRet = CSS1_NUMBER;
 460                         }
 461
 462                         if( CSS1_LENGTH==nRet && nScale!=1.0 )
 463                             nValue *= nScale;
 464
 465                         if( nRet == CSS1_NUMBER )
 466                         {
 467                             nInPos = nInPosOld;
 468                             cNextCh = cNextChOld;
 469                             nlLineNr = nlLineNrOld;
 470                             nlLinePos = nlLinePosOld;
 471                             bEOF = bEOFOld;
 472                         }
 473                         else
 474                         {
 475                             bWhiteSpace = false;
 476                         }
 477                         bNextCh = false;
 478                     }
 479                     break;
 480                 default: // NUMBER IDENT
 481                     bNextCh = false;
 482                     nRet = CSS1_NUMBER;
 483                     break;
 484                 }
 485             }
 486             break;
 487
 488         case ':': // ':'
 489             // link/visited/active abfangen !!!
 490             nRet = CSS1_COLON;
 491             break;
 492
 493         case '.': // DOT_W_WS | DOT_WO_WS
 494             nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
 495             break;
 496
 497         case '+': // '+'
 498             nRet = CSS1_PLUS;
 499             break;
 500
 501         case '-': // '-'
 502             nRet = CSS1_MINUS;
 503             break;
 504
 505         case '{': // '{'
 506             nRet = CSS1_OBRACE;
 507             break;
 508
 509         case '}': // '}'
 510             nRet = CSS1_CBRACE;
 511             break;
 512
 513         case ';': // ';'
 514             nRet = CSS1_SEMICOLON;
 515             break;
 516
 517         case ',': // ','
 518             nRet = CSS1_COMMA;
 519             break;
 520
 521         case '#': // '#'
 522             cNextCh = GetNextChar();
 523             if( ('0'<=cNextCh && '9'>=cNextCh) ||
 524                 ('a'<=cNextCh && 'f'>=cNextCh) ||
 525                 ('A'<=cNextCh && 'F'>=cNextCh) )
 526             {
 527                 // die aktuelle Position retten
 528                 sal_Int32 nInPosSave = nInPos;
 529                 sal_Unicode cNextChSave = cNextCh;
 530                 sal_uLong nlLineNrSave = nlLineNr;
 531                 sal_uLong nlLinePosSave = nlLinePos;
 532                 bool bEOFSave = bEOF;
 533
 534                 // erstmal versuchen eine Hex-Zahl zu scannen
 535                 OUStringBuffer sTmpBuffer(6);
 536                 do {
 537                     sTmpBuffer.append( cNextCh );
 538                     cNextCh = GetNextChar();
 539                 } while( sTmpBuffer.getLength() < 7 &&
 540                          ( ('0'<=cNextCh && '9'>=cNextCh) ||
 541                            ('A'<=cNextCh && 'F'>=cNextCh) ||
 542                            ('a'<=cNextCh && 'f'>=cNextCh) ) &&
 543                          !IsEOF() );
 544
 545                 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
 546                 {
 547                     // wir haben eine hexadezimale Farbe gefunden
 548                     aToken += sTmpBuffer.makeStringAndClear();
 549                     nRet = CSS1_HEXCOLOR;
 550                     bNextCh = false;
 551
 552                     break;
 553                 }
 554
 555                 // sonst versuchen wir es mit einer Zahl
 556                 nInPos = nInPosSave;
 557                 cNextCh = cNextChSave;
 558                 nlLineNr = nlLineNrSave;
 559                 nlLinePos = nlLinePosSave;
 560                 bEOF = bEOFSave;
 561             }
 562
 563             nRet = CSS1_HASH;
 564             bNextCh = false;
 565             break;
 566
 567         case ' ':
 568         case '\t':
 569         case '\r':
 570         case '\n': // White-Space
 571             bWhiteSpace = true;
 572             break;
 573
 574         case (sal_Unicode)EOF:
 575             if( IsEOF() )
 576             {
 577                 eState = CSS1_PAR_ACCEPTED;
 578                 bNextCh = false;
 579                 break;
 580             }
 581             SAL_FALLTHROUGH;
 582
 583         default: // IDENT | syntax error
 584             if (rtl::isAsciiAlpha(cNextCh))
 585             {
 586                 // IDENT
 587
 588                 bool bHexColor = true;
 589
 590                 // den naechsten Identifer scannen
 591                 OUStringBuffer sTmpBuffer(64);
 592                 do {
 593                     sTmpBuffer.append( cNextCh );
 594                     if( bHexColor )
 595                     {
 596                         bHexColor =  sTmpBuffer.getLength()<7 &&
 597                                      ( ('0'<=cNextCh && '9'>=cNextCh) ||
 598                                        ('A'<=cNextCh && 'F'>=cNextCh) ||
 599                                        ('a'<=cNextCh && 'f'>=cNextCh) );
 600                     }
 601                     cNextCh = GetNextChar();
 602                 } while( (rtl::isAsciiAlphanumeric(cNextCh) ||
 603                            '-' == cNextCh) && !IsEOF() );
 604
 605                 aToken += sTmpBuffer.makeStringAndClear();
 606
 607                 if( bHexColor && sTmpBuffer.getLength()==6 )
 608                 {
 609                     bNextCh = false;
 610                     nRet = CSS1_HEXCOLOR;
 611
 612                     break;
 613                 }
 614                 if( '('==cNextCh &&
 615                     ( (('u'==aToken[0] || 'U'==aToken[0]) &&
 616                        aToken.equalsIgnoreAsciiCase( "url" )) ||
 617                       (('r'==aToken[0] || 'R'==aToken[0]) &&
 618                        aToken.equalsIgnoreAsciiCase( "rgb" )) ) )
 619                 {
 620                     int nNestCnt = 0;
 621                     OUStringBuffer sTmpBuffer2(64);
 622                     do {
 623                         sTmpBuffer2.append( cNextCh );
 624                         switch( cNextCh )
 625                         {
 626                         case '(':   nNestCnt++; break;
 627                         case ')':   nNestCnt--; break;
 628                         }
 629                         cNextCh = GetNextChar();
 630                     } while( (nNestCnt>1 || ')'!=cNextCh) && !IsEOF() );
 631                     sTmpBuffer2.append( cNextCh );
 632                     aToken += sTmpBuffer2.makeStringAndClear();
 633                     bNextCh = true;
 634                     nRet = 'u'==aToken[0] || 'U'==aToken[0]
 635                                 ? CSS1_URL
 636                                 : CSS1_RGB;
 637                 }
 638                 else
 639                 {
 640                     bNextCh = false;
 641                     nRet = CSS1_IDENT;
 642                 }
 643             }
 644             // Fehlerbehandlung: Zeichen ignorieren
 645             break;
 646         }
 647         if( bNextCh )
 648             cNextCh = GetNextChar();
 649
 650     } while( CSS1_NULL==nRet && IsParserWorking() );
 651
 652     return nRet;
 653 }
 654
 655 // Dies folegenden Funktionen realisieren den in
 656
 657 //      http://www.w3.orh/pub/WWW/TR/WD-css1.html
 658 // bzw. http://www.w3.orh/pub/WWW/TR/WD-css1-960220.html
 659
 660 // beschriebenen Parser fuer CSS1. Es handelt sich um eine direkte
 661 // Umsetzung der dort beschriebenen Grammatik
 662
 663 // stylesheet
 664 //  : import* rule*
 665
 666 // import
 667 //  : IMPORT_SYM url
 668
 669 // url
 670 //  : STRING
 671
 672 void CSS1Parser::ParseStyleSheet()
 673 {
 674     LOOP_CHECK_DECL
 675
 676     // import*
 677     bool bDone = false;
 678     while( !bDone && IsParserWorking() )
 679     {
 680         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseStyleSheet()/import *" )
 681
 682         switch( nToken )
 683         {
 684         case CSS1_IMPORT_SYM:
 685             // IMPORT_SYM url
 686             // url ueberspringen wir ungeprueft
 687             nToken = GetNextToken();
 688             break;
 689         case CSS1_IDENT:            // Look-Aheads
 690         case CSS1_DOT_W_WS:
 691         case CSS1_HASH:
 692         case CSS1_PAGE_SYM:
 693             // rule
 694             bDone = true;
 695             break;
 696         default:
 697             // Fehlerbehandlung: ueberlesen
 698             break;
 699         }
 700
 701         if( !bDone )
 702             nToken = GetNextToken();
 703     }
 704
 705     LOOP_CHECK_RESTART
 706
 707     // rule *
 708     while( IsParserWorking() )
 709     {
 710         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseStyleSheet()/rule *" )
 711
 712         switch( nToken )
 713         {
 714         case CSS1_IDENT:        // Look-Aheads
 715         case CSS1_DOT_W_WS:
 716         case CSS1_HASH:
 717         case CSS1_PAGE_SYM:
 718             // rule
 719             ParseRule();
 720             break;
 721         default:
 722             // Fehlerbehandlung: ueberlesen
 723             nToken = GetNextToken();
 724             break;
 725         }
 726     }
 727 }
 728
 729 // rule
 730 //  : selector [ ',' selector ]*
 731 //    '{' declaration [ ';' declaration ]* '}'
 732
 733 void CSS1Parser::ParseRule()
 734 {
 735     // selector
 736     CSS1Selector *pSelector = ParseSelector();
 737     if( !pSelector )
 738         return;
 739
 740     // Selektor verarbeiten
 741     if( SelectorParsed( pSelector, true ) )
 742         delete pSelector;
 743
 744     LOOP_CHECK_DECL
 745
 746     // [ ',' selector ]*
 747     while( CSS1_COMMA==nToken && IsParserWorking() )
 748     {
 749         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseRule()/selector *" )
 750
 751         // ',' ueberelesen
 752         nToken = GetNextToken();
 753
 754         // selector
 755         pSelector = ParseSelector();
 756         if( !pSelector )
 757             return;
 758
 759         // Selektor verarbeiten
 760         if( SelectorParsed( pSelector, false ) )
 761             delete pSelector;
 762     }
 763
 764     // '{'
 765     if( CSS1_OBRACE != nToken )
 766         return;
 767     nToken = GetNextToken();
 768
 769     // declaration
 770     OUString aProperty;
 771     CSS1Expression *pExpr = ParseDeclaration( aProperty );
 772     if( !pExpr )
 773         return;
 774
 775     // expression verarbeiten
 776     if( DeclarationParsed( aProperty, pExpr ) )
 777         delete pExpr;
 778
 779     LOOP_CHECK_RESTART
 780
 781     // [ ';' declaration ]*
 782     while( CSS1_SEMICOLON==nToken && IsParserWorking() )
 783     {
 784         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseRule()/declaration *" )
 785
 786         // ';'
 787         nToken = GetNextToken();
 788
 789         // declaration
 790         if( CSS1_IDENT == nToken )
 791         {
 792             CSS1Expression *pExp = ParseDeclaration( aProperty );
 793             if( pExp )
 794             {
 795                 // expression verarbeiten
 796                 if( DeclarationParsed( aProperty, pExp ) )
 797                     delete pExp;
 798             }
 799         }
 800     }
 801
 802     // '}'
 803     if( CSS1_CBRACE == nToken )
 804         nToken = GetNextToken();
 805 }
 806
 807 // selector
 808 //  : simple_selector+ [ ':' pseudo_element ]?
 809
 810 // simple_selector
 811 //  : element_name [ DOT_WO_WS class ]?
 812 //  | DOT_W_WS class
 813 //  | id_selector
 814
 815 // element_name
 816 //  : IDENT
 817
 818 // class
 819 //  : IDENT
 820
 821 // id_selector
 822 //  : '#' IDENT
 823
 824 // pseude_element
 825 //  : IDENT
 826
 827 CSS1Selector *CSS1Parser::ParseSelector()
 828 {
 829     CSS1Selector *pRoot = nullptr, *pLast = nullptr;
 830
 831     bool bDone = false;
 832     CSS1Selector *pNew = nullptr;
 833
 834     LOOP_CHECK_DECL
 835
 836     // simple_selector+
 837     while( !bDone && IsParserWorking() )
 838     {
 839         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseSelector()" )
 840
 841         bool bNextToken = true;
 842
 843         switch( nToken )
 844         {
 845         case CSS1_IDENT:
 846             {
 847                 // element_name [ DOT_WO_WS class ]?
 848
 849                 // element_name
 850                 OUString aElement = aToken;
 851                 CSS1SelectorType eType = CSS1_SELTYPE_ELEMENT;
 852                 nToken = GetNextToken();
 853
 854                 if( CSS1_DOT_WO_WS == nToken )
 855                 {
 856                     // DOT_WO_WS
 857                     nToken = GetNextToken();
 858
 859                     // class
 860                     if( CSS1_IDENT == nToken )
 861                     {
 862                         aElement += "." + aToken;
 863                         eType = CSS1_SELTYPE_ELEM_CLASS;
 864                     }
 865                     else
 866                     {
 867                         // class fehlt
 868                         return pRoot;
 869                     }
 870                 }
 871                 else
 872                 {
 873                     // das war jetzt ein Look-Ahead
 874                     bNextToken = false;
 875                 }
 876                 pNew = new CSS1Selector( eType, aElement );
 877             }
 878             break;
 879         case CSS1_DOT_W_WS:
 880             // DOT_W_WS class
 881
 882             // DOT_W_WS
 883             nToken = GetNextToken();
 884
 885             if( CSS1_IDENT==nToken )
 886             {
 887                 // class
 888                 pNew = new CSS1Selector( CSS1_SELTYPE_CLASS, aToken );
 889             }
 890             else
 891             {
 892                 // class fehlt
 893                 return pRoot;
 894             }
 895             break;
 896         case CSS1_HASH:
 897             // '#' id_selector
 898
 899             // '#'
 900             nToken = GetNextToken();
 901
 902             if( CSS1_IDENT==nToken )
 903             {
 904                 // id_selector
 905                 pNew = new CSS1Selector( CSS1_SELTYPE_ID, aToken );
 906             }
 907             else
 908             {
 909                 // id_selector fehlt
 910                 return pRoot;
 911             }
 912             break;
 913
 914         case CSS1_PAGE_SYM:
 915             {
 916                 //  @page
 917                 pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, aToken );
 918             }
 919             break;
 920
 921         default:
 922             // wir wissen nicht was kommt, also aufhoehren
 923             bDone = true;
 924             break;
 925         }
 926
 927         // falls ein Selektor angelegt wurd, ihn speichern
 928         if( pNew )
 929         {
 930             OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
 931                     "Root-Selektor, aber kein Last" );
 932             if( pLast )
 933                 pLast->SetNext( pNew );
 934             else
 935                 pRoot = pNew;
 936
 937             pLast = pNew;
 938             pNew = nullptr;
 939         }
 940
 941         if( bNextToken && !bDone )
 942             nToken = GetNextToken();
 943     }
 944
 945     if( !pRoot )
 946     {
 947         // simple_selector fehlt
 948         return pRoot;
 949     }
 950
 951     // [ ':' pseudo_element ]?
 952     if( CSS1_COLON==nToken && IsParserWorking() )
 953     {
 954         // ':' pseudo element
 955         nToken = GetNextToken();
 956         if( CSS1_IDENT==nToken )
 957         {
 958             pLast->SetNext( new CSS1Selector(CSS1_SELTYPE_PSEUDO,aToken) );
 959             nToken = GetNextToken();
 960         }
 961         else
 962         {
 963             // pseudo_element fehlt
 964             return pRoot;
 965         }
 966     }
 967
 968     return pRoot;
 969 }
 970
 971 // declaration
 972 //  : property ':' expr prio?
 973 //  | /* empty */
 974
 975 // expression
 976 //  : term [ operator term ]*
 977
 978 // term
 979 //  : unary_operator?
 980 //     [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
 981 //       HEXCOLOR | URL | RGB ]
 982
 983 // operator
 984 //  : '/' | ',' | /* empty */
 985
 986 // unary_operator
 987 //  : '-' | '+'
 988
 989 // property
 990 //  : ident
 991
 992 // das Vorzeichen wird nur fuer numerische Werte (ausser PERCENTAGE)
 993 // beruecksichtigt und wird auf nValue angewendet!
 994 CSS1Expression *CSS1Parser::ParseDeclaration( OUString& rProperty )
 995 {
 996     CSS1Expression *pRoot = nullptr, *pLast = nullptr;
 997
 998     // property
 999     if( CSS1_IDENT != nToken )
1000     {
1001         // property fehlt
1002         return pRoot;
1003     }
1004     rProperty = aToken;
1005
1006     nToken = GetNextToken();
1007
1008     // ':'
1009     if( CSS1_COLON != nToken )
1010     {
1011         // ':' fehlt
1012         return pRoot;
1013     }
1014     nToken = GetNextToken();
1015
1016     // term [operator term]*
1017     // hier sind wir sehr lax, was die Syntax angeht, sollte aber kein
1018     // Problem sein
1019     bool bDone = false;
1020     sal_Unicode cSign = 0, cOp = 0;
1021     CSS1Expression *pNew = nullptr;
1022
1023     LOOP_CHECK_DECL
1024
1025     while( !bDone && IsParserWorking() )
1026     {
1027         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseDeclaration()" )
1028
1029         switch( nToken )
1030         {
1031         case CSS1_MINUS:
1032             cSign = '-';
1033             break;
1034
1035         case CSS1_PLUS:
1036             cSign = '+';
1037             break;
1038
1039         case CSS1_NUMBER:
1040         case CSS1_LENGTH:
1041         case CSS1_PIXLENGTH:
1042         case CSS1_EMS:
1043         case CSS1_EMX:
1044             if( '-'==cSign )
1045                 nValue = -nValue;
1046             SAL_FALLTHROUGH;
1047         case CSS1_STRING:
1048         case CSS1_PERCENTAGE:
1049         case CSS1_IDENT:
1050         case CSS1_URL:
1051         case CSS1_RGB:
1052         case CSS1_HEXCOLOR:
1053             pNew = new CSS1Expression( nToken, aToken, nValue, cOp );
1054             nValue = 0; // sonst landet das auch im naechsten Ident
1055             cSign = 0;
1056             cOp = 0;
1057             break;
1058
1059         case CSS1_SLASH:
1060             cOp = '/';
1061             cSign = 0;
1062             break;
1063
1064         case CSS1_COMMA:
1065             cOp = ',';
1066             cSign = 0;
1067             break;
1068
1069         default:
1070             bDone = true;
1071             break;
1072         }
1073
1074         // falls ein Expression angelegt wurde, diesen speichern
1075         if( pNew )
1076         {
1077             OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1078                     "Root-Selektor, aber kein Last" );
1079             if( pLast )
1080                 pLast->SetNext( pNew );
1081             else
1082                 pRoot = pNew;
1083
1084             pLast = pNew;
1085             pNew = nullptr;
1086         }
1087
1088         if( !bDone )
1089             nToken = GetNextToken();
1090     }
1091
1092     if( !pRoot )
1093     {
1094         // term fehlt
1095         return pRoot;
1096     }
1097
1098     // prio?
1099     if( CSS1_IMPORTANT_SYM==nToken )
1100     {
1101         // IMPORTANT_SYM
1102         nToken = GetNextToken();
1103     }
1104
1105     return pRoot;
1106 }
1107
1108 CSS1Parser::CSS1Parser()
1109     : bWhiteSpace(false)
1110     , bEOF(false)
1111     , cNextCh(0)
1112     , nInPos(0)
1113     , nlLineNr(0)
1114     , nlLinePos(0)
1115     , nValue(0)
1116     , eState(CSS1_PAR_ACCEPTED)
1117     , nToken(CSS1_NULL)
1118 {
1119 }
1120
1121 CSS1Parser::~CSS1Parser()
1122 {
1123 }
1124
1125 bool CSS1Parser::ParseStyleSheet( const OUString& rIn )
1126 {
1127     OUString aTmp( rIn );
1128
1129     sal_Unicode c;
1130     while( !aTmp.isEmpty() &&
1131            ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1132         aTmp = aTmp.copy( 1, aTmp.getLength() - 1 );
1133
1134     while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1135            || '\t'==c || '\r'==c || '\n'==c ) )
1136         aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1137
1138     // SGML-Kommentare entfernen
1139     if( aTmp.getLength() >= 4 &&
1140         aTmp.startsWith( "<!--" ) )
1141         aTmp = aTmp.copy( 4, aTmp.getLength() - 4 );
1142
1143     if( aTmp.getLength() >=3 &&
1144         aTmp.endsWith("-->") )
1145         aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1146
1147     if( aTmp.isEmpty() )
1148         return true;
1149
1150     InitRead( aTmp );
1151
1152     ParseStyleSheet();
1153
1154     return true;
1155 }
1156
1157 bool CSS1Parser::ParseStyleOption( const OUString& rIn )
1158 {
1159     if( rIn.isEmpty() )
1160         return true;
1161
1162     InitRead( rIn );
1163
1164     // fdo#41796: skip over spurious semicolons
1165     while (CSS1_SEMICOLON == nToken)
1166     {
1167         nToken = GetNextToken();
1168     }
1169
1170     OUString aProperty;
1171     CSS1Expression *pExpr = ParseDeclaration( aProperty );
1172     if( !pExpr )
1173     {
1174         return false;
1175     }
1176
1177     // expression verarbeiten
1178     if( DeclarationParsed( aProperty, pExpr ) )
1179         delete pExpr;
1180
1181     LOOP_CHECK_DECL
1182
1183     // [ ';' declaration ]*
1184     while( CSS1_SEMICOLON==nToken && IsParserWorking() )
1185     {
1186         LOOP_CHECK_CHECK( "Endlos-Schleife in ParseStyleOption()" )
1187
1188         nToken = GetNextToken();
1189         if( CSS1_IDENT==nToken )
1190         {
1191             CSS1Expression *pExp = ParseDeclaration( aProperty );
1192             if( pExp )
1193             {
1194                 // expression verarbeiten
1195                 if( DeclarationParsed( aProperty, pExp ) )
1196                     delete pExp;
1197             }
1198         }
1199     }
1200
1201     return true;
1202 }
1203
1204 bool CSS1Parser::SelectorParsed( CSS1Selector* /* pSelector */, bool /*bFirst*/ )
1205 {
1206     // Selektor loeschen
1207     return true;
1208 }
1209
1210 bool CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1211                                     const CSS1Expression * /* pExpr */ )
1212 {
1213     // Deklaration loeschen
1214     return true;
1215 }
1216
1217 CSS1Selector::~CSS1Selector()
1218 {
1219     delete pNext;
1220 }
1221
1222 CSS1Expression::~CSS1Expression()
1223 {
1224     delete pNext;
1225 }
1226
1227 void CSS1Expression::GetURL( OUString& rURL  ) const
1228 {
1229     OSL_ENSURE( CSS1_URL==eType, "CSS1-Ausruck ist keine Farbe URL" );
1230
1231     OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1232                 aValue.getLength() > 5 &&
1233                 '(' == aValue[3] &&
1234                 ')' == aValue[aValue.getLength()-1],
1235                 "keine gueltiges URL(...)" );
1236
1237     if( aValue.getLength() > 5 )
1238     {
1239         rURL = aValue.copy( 4, aValue.getLength() - 5 );
1240
1241         // tdf#94088 original stripped only spaces, but there may also be
1242         // double quotes in CSS style URLs, so be prepared to spaces followed
1243         // by a single quote followed by spaces
1244         const sal_Unicode aSpace(' ');
1245         const sal_Unicode aSingleQuote('\'');
1246
1247         rURL = comphelper::string::strip(rURL, aSpace);
1248         rURL = comphelper::string::strip(rURL, aSingleQuote);
1249         rURL = comphelper::string::strip(rURL, aSpace);
1250     }
1251 }
1252
1253 bool CSS1Expression::GetColor( Color &rColor ) const
1254 {
1255     OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1256                 CSS1_HEXCOLOR==eType || CSS1_STRING==eType,
1257                 "CSS1-Ausruck kann keine Farbe sein" );
1258
1259     bool bRet = false;
1260     sal_uInt32 nColor = SAL_MAX_UINT32;
1261
1262     switch( eType )
1263     {
1264     case CSS1_RGB:
1265         {
1266             sal_uInt8 aColors[3] = { 0, 0, 0 };
1267
1268             if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1269                     aValue[3] != '(' || aValue[aValue.getLength()-1] != ')')
1270             {
1271                 break;
1272             }
1273
1274             sal_Int32 nPos = 4; // start after "rgb("
1275             for ( int nCol = 0; nCol < 3 && nPos > 0; ++nCol )
1276             {
1277                 const OUString aNumber = aValue.getToken(0, ',', nPos);
1278
1279                 sal_Int32 nNumber = aNumber.toInt32();
1280                 if( nNumber<0 )
1281                 {
1282                     nNumber = 0;
1283                 }
1284                 else if( aNumber.indexOf('%') >= 0 )
1285                 {
1286                     if( nNumber > 100 )
1287                         nNumber = 100;
1288                     nNumber *= 255;
1289                     nNumber /= 100;
1290                 }
1291                 else if( nNumber > 255 )
1292                     nNumber = 255;
1293
1294                 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1295             }
1296
1297             rColor.SetRed( aColors[0] );
1298             rColor.SetGreen( aColors[1] );
1299             rColor.SetBlue( aColors[2] );
1300
1301             bRet = true;    // etwas anderes als eine Farbe kann es nicht sein
1302         }
1303         break;
1304
1305     case CSS1_IDENT:
1306     case CSS1_STRING:
1307         {
1308             OUString aTmp( aValue.toAsciiUpperCase() );
1309             nColor = GetHTMLColor( aTmp );
1310             bRet = nColor != SAL_MAX_UINT32;
1311         }
1312         if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1313             aValue[0] != '#' )
1314             break;
1315         SAL_FALLTHROUGH;
1316     case CSS1_HEXCOLOR:
1317         {
1318             // HACK fuer MS-IE: DIe Farbe kann auch in einem String stehen
1319             sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1320             bool bDouble = aValue.getLength()-nOffset == 3;
1321             sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1322
1323             nColor = 0;
1324             for( ; i<nEnd; i++ )
1325             {
1326                 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1327                                                          : '0' );
1328                 if( c >= '0' && c <= '9' )
1329                     c -= 48;
1330                 else if( c >= 'A' && c <= 'F' )
1331                     c -= 55;
1332                 else if( c >= 'a' && c <= 'f' )
1333                     c -= 87;
1334                 else
1335                     c = 16;
1336
1337                 nColor *= 16;
1338                 if( c<16 )
1339                     nColor += c;
1340                 if( bDouble )
1341                 {
1342                     nColor *= 16;
1343                     if( c<16 )
1344                         nColor += c;
1345                 }
1346             }
1347             bRet = true;
1348         }
1349         break;
1350     default:
1351         ;
1352     }
1353
1354     if( bRet && nColor!=SAL_MAX_UINT32 )
1355     {
1356         rColor.SetRed( (sal_uInt8)((nColor & 0x00ff0000UL) >> 16) );
1357         rColor.SetGreen( (sal_uInt8)((nColor & 0x0000ff00UL) >> 8) );
1358         rColor.SetBlue( (sal_uInt8)(nColor & 0x000000ffUL) );
1359     }
1360
1361     return bRet;
1362 }
1363
1364 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */