sw/source/filter/html/parcss1.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <o3tl/string_view.hxx>
  21 #include <osl/diagnose.h>
  22 #include <rtl/character.hxx>
  23 #include <rtl/ustrbuf.hxx>
  24 #include <tools/color.hxx>
  25 #include <tools/solar.h>
  26 #include <svtools/htmltokn.h>
  27 #include <comphelper/string.hxx>
  28 #include "parcss1.hxx"
  29
  30 // Loop-Check: Used to avoid infinite loops, is checked after every
  31 // loop, if there is progress of the input position
  32 #define LOOP_CHECK
  33
  34 #ifdef LOOP_CHECK
  35
  36 #define LOOP_CHECK_DECL \
  37     sal_Int32 nOldInPos = SAL_MAX_INT32;
  38 #define LOOP_CHECK_RESTART \
  39     nOldInPos = SAL_MAX_INT32;
  40 #define LOOP_CHECK_CHECK( where ) \
  41     OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where );    \
  42     if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) )                    \
  43         break;                                                              \
  44     else                                                                    \
  45         nOldInPos = m_nInPos;
  46
  47 #else
  48
  49 #define LOOP_CHECK_DECL
  50 #define LOOP_CHECK_RESTART
  51 #define LOOP_CHECK_CHECK( where )
  52
  53 #endif
  54
  55 const sal_Int32 MAX_LEN = 1024;
  56
  57 void CSS1Parser::InitRead( const OUString& rIn )
  58 {
  59     m_nlLineNr = 0;
  60     m_nlLinePos = 0;
  61
  62     m_bWhiteSpace = true; // if nothing was read it's like there was WS
  63     m_bEOF = false;
  64     m_eState = CSS1_PAR_WORKING;
  65     m_nValue = 0.;
  66
  67     m_aIn = rIn;
  68     m_nInPos = 0;
  69     m_cNextCh = GetNextChar();
  70     m_nToken = GetNextToken();
  71 }
  72
  73 sal_Unicode CSS1Parser::GetNextChar()
  74 {
  75     if( m_nInPos >= m_aIn.getLength() )
  76     {
  77         m_bEOF = true;
  78         return sal_Unicode(EOF);
  79     }
  80
  81     sal_Unicode c = m_aIn[m_nInPos];
  82     m_nInPos++;
  83
  84     if( c == '\n' )
  85     {
  86         ++m_nlLineNr;
  87         m_nlLinePos = 1;
  88     }
  89     else
  90         ++m_nlLinePos;
  91
  92     return c;
  93 }
  94
  95 // This function implements the scanner described in
  96
  97 //       http://www.w3.org/pub/WWW/TR/WD-css1.html
  98 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
  99
 100 // for CSS1. It's a direct implementation of the
 101 // described Lex grammar.
 102
 103 CSS1Token CSS1Parser::GetNextToken()
 104 {
 105     CSS1Token nRet = CSS1_NULL;
 106     m_aToken.clear();
 107
 108     do {
 109         // remember if white space was read
 110         bool bPrevWhiteSpace = m_bWhiteSpace;
 111         m_bWhiteSpace = false;
 112
 113         bool bNextCh = true;
 114         switch( m_cNextCh )
 115         {
 116         case '/': // COMMENT | '/'
 117             {
 118                 m_cNextCh = GetNextChar();
 119                 if( '*' == m_cNextCh )
 120                 {
 121                     // COMMENT
 122                     m_cNextCh = GetNextChar();
 123
 124                     bool bAsterisk = false;
 125                     while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
 126                     {
 127                         bAsterisk = ('*'==m_cNextCh);
 128                         m_cNextCh = GetNextChar();
 129                     }
 130                 }
 131                 else
 132                 {
 133                     // '/'
 134                     bNextCh = false;
 135                     nRet = CSS1_SLASH;
 136                 }
 137             }
 138             break;
 139
 140         case '@': // '@import' | '@XXX'
 141             {
 142                 m_cNextCh = GetNextChar();
 143                 if (rtl::isAsciiAlpha(m_cNextCh))
 144                 {
 145                     // scan the next identifier
 146                     OUStringBuffer sTmpBuffer(32);
 147                     do {
 148                         sTmpBuffer.append( m_cNextCh );
 149                         m_cNextCh = GetNextChar();
 150                     } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 151                              '-' == m_cNextCh) && !IsEOF() );
 152
 153                     m_aToken += sTmpBuffer;
 154
 155                     // check if we know it
 156                     switch( m_aToken[0] )
 157                     {
 158                     case 'i':
 159                     case 'I':
 160                         if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
 161                             nRet = CSS1_IMPORT_SYM;
 162                         break;
 163                     case 'p':
 164                     case 'P':
 165                         if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
 166                             nRet = CSS1_PAGE_SYM;
 167                         break;
 168                     }
 169
 170                     // error handling: ignore '@indent' and the rest until
 171                     // semicolon at end of the next block
 172                     if( CSS1_NULL==nRet )
 173                     {
 174                         m_aToken.clear();
 175                         int nBlockLvl = 0;
 176                         sal_Unicode cQuoteCh = 0;
 177                         bool bDone = false, bEscape = false;
 178                         while( !bDone && !IsEOF() )
 179                         {
 180                             bool bOldEscape = bEscape;
 181                             bEscape = false;
 182                             switch( m_cNextCh )
 183                             {
 184                             case '{':
 185                                 if( !cQuoteCh && !bOldEscape )
 186                                     nBlockLvl++;
 187                                 break;
 188                             case ';':
 189                                 if( !cQuoteCh && !bOldEscape )
 190                                     bDone = nBlockLvl==0;
 191                                 break;
 192                             case '}':
 193                                 if( !cQuoteCh && !bOldEscape )
 194                                     bDone = --nBlockLvl==0;
 195                                 break;
 196                             case '\"':
 197                             case '\'':
 198                                 if( !bOldEscape )
 199                                 {
 200                                     if( cQuoteCh )
 201                                     {
 202                                         if( cQuoteCh == m_cNextCh )
 203                                             cQuoteCh = 0;
 204                                     }
 205                                     else
 206                                     {
 207                                         cQuoteCh = m_cNextCh;
 208                                     }
 209                                 }
 210                                 break;
 211                             case '\\':
 212                                 if( !bOldEscape )
 213                                     bEscape = true;
 214                                 break;
 215                             }
 216                             m_cNextCh = GetNextChar();
 217                         }
 218                     }
 219
 220                     bNextCh = false;
 221                 }
 222             }
 223             break;
 224
 225         case '!': // '!' 'legal' | '!' 'important' | syntax error
 226             {
 227                 // ignore white space
 228                 m_cNextCh = GetNextChar();
 229                 while( ( ' ' == m_cNextCh ||
 230                        (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
 231                 {
 232                     m_bWhiteSpace = true;
 233                     m_cNextCh = GetNextChar();
 234                 }
 235
 236                 if( 'i'==m_cNextCh || 'I'==m_cNextCh)
 237                 {
 238                     // scan next identifier
 239                     OUStringBuffer sTmpBuffer(32);
 240                     do {
 241                         sTmpBuffer.append( m_cNextCh );
 242                         m_cNextCh = GetNextChar();
 243                     } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 244                              '-' == m_cNextCh) && !IsEOF() );
 245
 246                     m_aToken += sTmpBuffer;
 247
 248                     if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
 249                         m_aToken.equalsIgnoreAsciiCase( "important" ) )
 250                     {
 251                         // '!' 'important'
 252                         nRet = CSS1_IMPORTANT_SYM;
 253                     }
 254                     else
 255                     {
 256                         // error handling: ignore '!', not IDENT
 257                         nRet = CSS1_IDENT;
 258                     }
 259
 260                     m_bWhiteSpace = false;
 261                     bNextCh = false;
 262                 }
 263                 else
 264                 {
 265                     // error handling: ignore '!'
 266                     bNextCh = false;
 267                 }
 268             }
 269             break;
 270
 271         case '\"':
 272         case '\'': // STRING
 273             {
 274                 // \... isn't possible yet!!!
 275                 sal_Unicode cQuoteChar = m_cNextCh;
 276                 m_cNextCh = GetNextChar();
 277
 278                 OUStringBuffer sTmpBuffer( MAX_LEN );
 279                 do {
 280                     sTmpBuffer.append( m_cNextCh );
 281                     m_cNextCh = GetNextChar();
 282                 } while( cQuoteChar != m_cNextCh && !IsEOF() );
 283
 284                 m_aToken += sTmpBuffer;
 285
 286                 nRet = CSS1_STRING;
 287             }
 288             break;
 289
 290         case '0':
 291         case '1':
 292         case '2':
 293         case '3':
 294         case '4':
 295         case '5':
 296         case '6':
 297         case '7':
 298         case '8':
 299         case '9': // NUMBER | PERCENTAGE | LENGTH
 300             {
 301                 // save current position
 302                 std::size_t nInPosSave = m_nInPos;
 303                 sal_Unicode cNextChSave = m_cNextCh;
 304                 sal_uInt32 nlLineNrSave = m_nlLineNr;
 305                 sal_uInt32 nlLinePosSave = m_nlLinePos;
 306                 bool bEOFSave = m_bEOF;
 307
 308                 // first try to parse a hex digit
 309                 OUStringBuffer sTmpBuffer( 16 );
 310                 do {
 311                     sTmpBuffer.append( m_cNextCh );
 312                     m_cNextCh = GetNextChar();
 313                 } while( sTmpBuffer.getLength() < 7 &&
 314                          ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 315                            ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
 316                            ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
 317                          !IsEOF() );
 318
 319                 if( sTmpBuffer.getLength()==6 )
 320                 {
 321                     // we found a color in hex
 322                     m_aToken += sTmpBuffer;
 323                     nRet = CSS1_HEXCOLOR;
 324                     bNextCh = false;
 325
 326                     break;
 327                 }
 328
 329                 // otherwise we try a number
 330                 m_nInPos = nInPosSave;
 331                 m_cNextCh = cNextChSave;
 332                 m_nlLineNr = nlLineNrSave;
 333                 m_nlLinePos = nlLinePosSave;
 334                 m_bEOF = bEOFSave;
 335
 336                 // first parse the number
 337                 sTmpBuffer.setLength( 0 );
 338                 do {
 339                     sTmpBuffer.append( m_cNextCh );
 340                     m_cNextCh = GetNextChar();
 341                 } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
 342                          !IsEOF() );
 343
 344                 m_aToken += sTmpBuffer;
 345                 m_nValue = m_aToken.toDouble();
 346
 347                 // ignore white space
 348                 while( ( ' ' == m_cNextCh ||
 349                        (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
 350                 {
 351                     m_bWhiteSpace = true;
 352                     m_cNextCh = GetNextChar();
 353                 }
 354
 355                 // check now, of there is a unit
 356                 switch( m_cNextCh )
 357                 {
 358                 case '%': // PERCENTAGE
 359                     m_bWhiteSpace = false;
 360                     nRet = CSS1_PERCENTAGE;
 361                     break;
 362
 363                 case 'c':
 364                 case 'C': // LENGTH cm | LENGTH IDENT
 365                 case 'e':
 366                 case 'E': // LENGTH (em | ex) | LENGTH IDENT
 367                 case 'i':
 368                 case 'I': // LENGTH inch | LENGTH IDENT
 369                 case 'p':
 370                 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
 371                 case 'm':
 372                 case 'M': // LENGTH mm | LENGTH IDENT
 373                     {
 374                         // save current position
 375                         sal_Int32 nInPosOld = m_nInPos;
 376                         sal_Unicode cNextChOld = m_cNextCh;
 377                         sal_uInt32 nlLineNrOld  = m_nlLineNr;
 378                         sal_uInt32 nlLinePosOld = m_nlLinePos;
 379                         bool bEOFOld = m_bEOF;
 380
 381                         // parse the next identifier
 382                         OUString aIdent;
 383                         OUStringBuffer sTmpBuffer2(64);
 384                         do {
 385                             sTmpBuffer2.append( m_cNextCh );
 386                             m_cNextCh = GetNextChar();
 387                         } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 388                                  '-' == m_cNextCh) && !IsEOF() );
 389
 390                         aIdent += sTmpBuffer2;
 391
 392                         // Is it a unit?
 393                         const char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
 394                         double nScale1 = 1., nScale2 = 1.;
 395                         CSS1Token nToken1 = CSS1_LENGTH,
 396                                   nToken2 = CSS1_LENGTH,
 397                                   nToken3 = CSS1_LENGTH;
 398                         switch( aIdent[0] )
 399                         {
 400                         case 'c':
 401                         case 'C':
 402                             pCmp1 = "cm";
 403                             nScale1 = (72.*20.)/2.54; // twip
 404                             break;
 405                         case 'e':
 406                         case 'E':
 407                             pCmp1 = "em";
 408                             nToken1 = CSS1_EMS;
 409
 410                             pCmp2 = "ex";
 411                             nToken2 = CSS1_EMX;
 412                             break;
 413                         case 'i':
 414                         case 'I':
 415                             pCmp1 = "in";
 416                             nScale1 = 72.*20.; // twip
 417                             break;
 418                         case 'm':
 419                         case 'M':
 420                             pCmp1 = "mm";
 421                             nScale1 = (72.*20.)/25.4; // twip
 422                             break;
 423                         case 'p':
 424                         case 'P':
 425                             pCmp1 = "pt";
 426                             nScale1 = 20.; // twip
 427
 428                             pCmp2 = "pc";
 429                             nScale2 = 12.*20.; // twip
 430
 431                             pCmp3 = "px";
 432                             nToken3 = CSS1_PIXLENGTH;
 433                             break;
 434                         }
 435
 436                         double nScale = 0.0;
 437                         OSL_ENSURE( pCmp1, "Where does the first digit come from?" );
 438                         if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
 439                         {
 440                             nScale = nScale1;
 441                             nRet = nToken1;
 442                         }
 443                         else if( pCmp2 &&
 444                                  aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
 445                         {
 446                             nScale = nScale2;
 447                             nRet = nToken2;
 448                         }
 449                         else if( pCmp3 &&
 450                                  aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
 451                         {
 452                             nScale =  1.; // nScale3
 453                             nRet = nToken3;
 454                         }
 455                         else
 456                         {
 457                             nRet = CSS1_NUMBER;
 458                         }
 459
 460                         if( CSS1_LENGTH==nRet && nScale!=1.0 )
 461                             m_nValue *= nScale;
 462
 463                         if( nRet == CSS1_NUMBER )
 464                         {
 465                             m_nInPos = nInPosOld;
 466                             m_cNextCh = cNextChOld;
 467                             m_nlLineNr = nlLineNrOld;
 468                             m_nlLinePos = nlLinePosOld;
 469                             m_bEOF = bEOFOld;
 470                         }
 471                         else
 472                         {
 473                             m_bWhiteSpace = false;
 474                         }
 475                         bNextCh = false;
 476                     }
 477                     break;
 478                 default: // NUMBER IDENT
 479                     bNextCh = false;
 480                     nRet = CSS1_NUMBER;
 481                     break;
 482                 }
 483             }
 484             break;
 485
 486         case ':': // ':'
 487             // catch link/visited/active !!!
 488             nRet = CSS1_COLON;
 489             break;
 490
 491         case '.': // DOT_W_WS | DOT_WO_WS
 492             nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
 493             break;
 494
 495         case '+': // '+'
 496             nRet = CSS1_PLUS;
 497             break;
 498
 499         case '-': // '-'
 500             nRet = CSS1_MINUS;
 501             break;
 502
 503         case '{': // '{'
 504             nRet = CSS1_OBRACE;
 505             break;
 506
 507         case '}': // '}'
 508             nRet = CSS1_CBRACE;
 509             break;
 510
 511         case ';': // ';'
 512             nRet = CSS1_SEMICOLON;
 513             break;
 514
 515         case ',': // ','
 516             nRet = CSS1_COMMA;
 517             break;
 518
 519         case '#': // '#'
 520             m_cNextCh = GetNextChar();
 521             if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 522                 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
 523                 ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
 524             {
 525                 // save current position
 526                 sal_Int32 nInPosSave = m_nInPos;
 527                 sal_Unicode cNextChSave = m_cNextCh;
 528                 sal_uInt32 nlLineNrSave = m_nlLineNr;
 529                 sal_uInt32 nlLinePosSave = m_nlLinePos;
 530                 bool bEOFSave = m_bEOF;
 531
 532                 // first try to parse a hex digit
 533                 OUStringBuffer sTmpBuffer(8);
 534                 do {
 535                     sTmpBuffer.append( m_cNextCh );
 536                     m_cNextCh = GetNextChar();
 537                 } while( sTmpBuffer.getLength() < 9 &&
 538                          ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 539                            ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
 540                            ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
 541                          !IsEOF() );
 542
 543                 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
 544                 {
 545                     // we found a color in hex (RGB)
 546                     m_aToken += sTmpBuffer;
 547                     nRet = CSS1_HEXCOLOR;
 548                     bNextCh = false;
 549
 550                     break;
 551                 }
 552
 553                 if( sTmpBuffer.getLength()==8 )
 554                 {
 555                     // we found a color in hex (RGBA)
 556                     // we convert it to RGB assuming white background
 557                     sal_uInt32 nColor = sTmpBuffer.makeStringAndClear().toUInt32(16);
 558                     sal_uInt32 nRed = (nColor & 0xff000000) >> 24;
 559                     sal_uInt32 nGreen = (nColor & 0xff0000) >> 16;
 560                     sal_uInt32 nBlue = (nColor & 0xff00) >> 8;
 561                     double nAlpha = (nColor & 0xff) / 255.0;
 562                     nRed = (1 - nAlpha) * 255 + nAlpha * nRed;
 563                     nGreen = (1 - nAlpha) * 255 + nAlpha * nGreen;
 564                     nBlue = (1 - nAlpha) * 255 + nAlpha * nBlue;
 565                     nColor = (nRed << 16) + (nGreen << 8) + nBlue;
 566                     m_aToken += OUString::number(nColor, 16);
 567                     nRet = CSS1_HEXCOLOR;
 568                     bNextCh = false;
 569
 570                     break;
 571                 }
 572
 573                 // otherwise we try a number
 574                 m_nInPos = nInPosSave;
 575                 m_cNextCh = cNextChSave;
 576                 m_nlLineNr = nlLineNrSave;
 577                 m_nlLinePos = nlLinePosSave;
 578                 m_bEOF = bEOFSave;
 579             }
 580
 581             nRet = CSS1_HASH;
 582             bNextCh = false;
 583             break;
 584
 585         case ' ':
 586         case '\t':
 587         case '\r':
 588         case '\n': // White-Space
 589             m_bWhiteSpace = true;
 590             break;
 591
 592         case sal_Unicode(EOF):
 593             if( IsEOF() )
 594             {
 595                 m_eState = CSS1_PAR_ACCEPTED;
 596                 bNextCh = false;
 597                 break;
 598             }
 599             [[fallthrough]];
 600
 601         default: // IDENT | syntax error
 602             if (rtl::isAsciiAlpha(m_cNextCh))
 603             {
 604                 // IDENT
 605
 606                 bool bHexColor = true;
 607
 608                 // parse the next identifier
 609                 OUStringBuffer sTmpBuffer(64);
 610                 do {
 611                     sTmpBuffer.append( m_cNextCh );
 612                     if( bHexColor )
 613                     {
 614                         bHexColor =  sTmpBuffer.getLength()<7 &&
 615                                      ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 616                                        ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
 617                                        ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
 618                     }
 619                     m_cNextCh = GetNextChar();
 620                 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 621                            '-' == m_cNextCh) && !IsEOF() );
 622
 623                 m_aToken += sTmpBuffer;
 624
 625                 if( bHexColor && sTmpBuffer.getLength()==6 )
 626                 {
 627                     bNextCh = false;
 628                     nRet = CSS1_HEXCOLOR;
 629
 630                     break;
 631                 }
 632                 if( '('==m_cNextCh &&
 633                     ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
 634                        m_aToken.equalsIgnoreAsciiCase( "url" )) ||
 635                       (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
 636                        (m_aToken.equalsIgnoreAsciiCase( "rgb" ) || m_aToken.equalsIgnoreAsciiCase( "rgba" ) )
 637                   ) ) )
 638                 {
 639                     int nNestCnt = 0;
 640                     OUStringBuffer sTmpBuffer2(64);
 641                     do {
 642                         sTmpBuffer2.append( m_cNextCh );
 643                         switch( m_cNextCh )
 644                         {
 645                         case '(':   nNestCnt++; break;
 646                         case ')':   nNestCnt--; break;
 647                         }
 648                         m_cNextCh = GetNextChar();
 649                     } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
 650                     sTmpBuffer2.append( m_cNextCh );
 651                     m_aToken += sTmpBuffer2;
 652                     bNextCh = true;
 653                     nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
 654                                 ? CSS1_URL
 655                                 : CSS1_RGB;
 656                 }
 657                 else
 658                 {
 659                     bNextCh = false;
 660                     nRet = CSS1_IDENT;
 661                 }
 662             }
 663             // error handling: ignore digit
 664             break;
 665         }
 666         if( bNextCh )
 667             m_cNextCh = GetNextChar();
 668
 669     } while( CSS1_NULL==nRet && IsParserWorking() );
 670
 671     return nRet;
 672 }
 673
 674 // These functions implement the parser described in
 675
 676 //       http://www.w3.org/pub/WWW/TR/WD-css1.html
 677 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
 678
 679 // for CSS1. It's a direct implementation of the
 680 // described Lex grammar.
 681
 682 // stylesheet
 683 //  : import* rule*
 684
 685 // import
 686 //  : IMPORT_SYM url
 687
 688 // url
 689 //  : STRING
 690
 691 void CSS1Parser::ParseStyleSheet()
 692 {
 693     LOOP_CHECK_DECL
 694
 695     // import*
 696     bool bDone = false;
 697     while( !bDone && IsParserWorking() )
 698     {
 699         LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
 700
 701         switch( m_nToken )
 702         {
 703         case CSS1_IMPORT_SYM:
 704             // IMPORT_SYM url
 705             // URL are skipped without checks
 706             m_nToken = GetNextToken();
 707             break;
 708         case CSS1_IDENT:            // Look-Aheads
 709         case CSS1_DOT_W_WS:
 710         case CSS1_HASH:
 711         case CSS1_PAGE_SYM:
 712             // rule
 713             bDone = true;
 714             break;
 715         default:
 716             // error handling: ignore
 717             break;
 718         }
 719
 720         if( !bDone )
 721             m_nToken = GetNextToken();
 722     }
 723
 724     LOOP_CHECK_RESTART
 725
 726     // rule *
 727     while( IsParserWorking() )
 728     {
 729         LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
 730
 731         switch( m_nToken )
 732         {
 733         case CSS1_IDENT:        // Look-Aheads
 734         case CSS1_DOT_W_WS:
 735         case CSS1_HASH:
 736         case CSS1_PAGE_SYM:
 737             // rule
 738             ParseRule();
 739             break;
 740         default:
 741             // error handling: ignore
 742             m_nToken = GetNextToken();
 743             break;
 744         }
 745     }
 746 }
 747
 748 // rule
 749 //  : selector [ ',' selector ]*
 750 //    '{' declaration [ ';' declaration ]* '}'
 751
 752 void CSS1Parser::ParseRule()
 753 {
 754     // selector
 755     std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
 756     if( !pSelector )
 757         return;
 758
 759     // process selector
 760     SelectorParsed( std::move(pSelector), true );
 761
 762     LOOP_CHECK_DECL
 763
 764     // [ ',' selector ]*
 765     while( CSS1_COMMA==m_nToken && IsParserWorking() )
 766     {
 767         LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
 768
 769         // ignore ','
 770         m_nToken = GetNextToken();
 771
 772         // selector
 773         pSelector = ParseSelector();
 774         if( !pSelector )
 775             return;
 776
 777         // process selector
 778         SelectorParsed( std::move(pSelector), false );
 779     }
 780
 781     // '{'
 782     if( CSS1_OBRACE != m_nToken )
 783         return;
 784     m_nToken = GetNextToken();
 785
 786     // declaration
 787     OUString aProperty;
 788     std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
 789     if( !pExpr )
 790         return;
 791
 792     // process expression
 793     DeclarationParsed( aProperty, std::move(pExpr) );
 794
 795     LOOP_CHECK_RESTART
 796
 797     // [ ';' declaration ]*
 798     while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
 799     {
 800         LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
 801
 802         // ';'
 803         m_nToken = GetNextToken();
 804
 805         // declaration
 806         if( CSS1_IDENT == m_nToken )
 807         {
 808             std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
 809             if( pExp )
 810             {
 811                 // process expression
 812                 DeclarationParsed( aProperty, std::move(pExp));
 813             }
 814         }
 815     }
 816
 817     // '}'
 818     if( CSS1_CBRACE == m_nToken )
 819         m_nToken = GetNextToken();
 820 }
 821
 822 // selector
 823 //  : simple_selector+ [ ':' pseudo_element ]?
 824
 825 // simple_selector
 826 //  : element_name [ DOT_WO_WS class ]?
 827 //  | DOT_W_WS class
 828 //  | id_selector
 829
 830 // element_name
 831 //  : IDENT
 832
 833 // class
 834 //  : IDENT
 835
 836 // id_selector
 837 //  : '#' IDENT
 838
 839 // pseudo_element
 840 //  : IDENT
 841
 842 std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
 843 {
 844     std::unique_ptr<CSS1Selector> pRoot;
 845     CSS1Selector *pLast = nullptr;
 846
 847     bool bDone = false;
 848     CSS1Selector *pNew = nullptr;
 849
 850     LOOP_CHECK_DECL
 851
 852     // simple_selector+
 853     while( !bDone && IsParserWorking() )
 854     {
 855         LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
 856
 857         bool bNextToken = true;
 858
 859         switch( m_nToken )
 860         {
 861         case CSS1_IDENT:
 862             {
 863                 // element_name [ DOT_WO_WS class ]?
 864
 865                 // element_name
 866                 OUString aElement = m_aToken;
 867                 CSS1SelectorType eType = CSS1_SELTYPE_ELEMENT;
 868                 m_nToken = GetNextToken();
 869
 870                 if( CSS1_DOT_WO_WS == m_nToken )
 871                 {
 872                     // DOT_WO_WS
 873                     m_nToken = GetNextToken();
 874
 875                     // class
 876                     if( CSS1_IDENT == m_nToken )
 877                     {
 878                         aElement += "." + m_aToken;
 879                         eType = CSS1_SELTYPE_ELEM_CLASS;
 880                     }
 881                     else
 882                     {
 883                         // missing class
 884                         return pRoot;
 885                     }
 886                 }
 887                 else
 888                 {
 889                     // that was a look-ahead
 890                     bNextToken = false;
 891                 }
 892                 pNew = new CSS1Selector( eType, aElement );
 893             }
 894             break;
 895         case CSS1_DOT_W_WS:
 896             // DOT_W_WS class
 897
 898             // DOT_W_WS
 899             m_nToken = GetNextToken();
 900
 901             if( CSS1_IDENT==m_nToken )
 902             {
 903                 // class
 904                 pNew = new CSS1Selector( CSS1_SELTYPE_CLASS, m_aToken );
 905             }
 906             else
 907             {
 908                 // missing class
 909                 return pRoot;
 910             }
 911             break;
 912         case CSS1_HASH:
 913             // '#' id_selector
 914
 915             // '#'
 916             m_nToken = GetNextToken();
 917
 918             if( CSS1_IDENT==m_nToken )
 919             {
 920                 // id_selector
 921                 pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken );
 922             }
 923             else
 924             {
 925                 // missing id_selector
 926                 return pRoot;
 927             }
 928             break;
 929
 930         case CSS1_PAGE_SYM:
 931             {
 932                 //  @page
 933                 pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken );
 934             }
 935             break;
 936
 937         default:
 938             // stop because we don't know what's next
 939             bDone = true;
 940             break;
 941         }
 942
 943         // if created a new selector then save it
 944         if( pNew )
 945         {
 946             OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
 947                     "Root-Selector, but no Last" );
 948             if( pLast )
 949                 pLast->SetNext( pNew );
 950             else
 951                 pRoot.reset(pNew);
 952
 953             pLast = pNew;
 954             pNew = nullptr;
 955         }
 956
 957         if( bNextToken && !bDone )
 958             m_nToken = GetNextToken();
 959     }
 960
 961     if( !pRoot )
 962     {
 963         // missing simple_selector
 964         return pRoot;
 965     }
 966
 967     // [ ':' pseudo_element ]?
 968     if( CSS1_COLON==m_nToken && IsParserWorking() )
 969     {
 970         // ':' pseudo element
 971         m_nToken = GetNextToken();
 972         if( CSS1_IDENT==m_nToken )
 973         {
 974             if (pLast)
 975                 pLast->SetNext( new CSS1Selector(CSS1_SELTYPE_PSEUDO,m_aToken) );
 976             m_nToken = GetNextToken();
 977         }
 978         else
 979         {
 980             // missing pseudo_element
 981             return pRoot;
 982         }
 983     }
 984
 985     return pRoot;
 986 }
 987
 988 // declaration
 989 //  : property ':' expr prio?
 990 //  | /* empty */
 991
 992 // expression
 993 //  : term [ operator term ]*
 994
 995 // term
 996 //  : unary_operator?
 997 //     [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
 998 //       HEXCOLOR | URL | RGB ]
 999
1000 // operator
1001 //  : '/' | ',' | /* empty */
1002
1003 // unary_operator
1004 //  : '-' | '+'
1005
1006 // property
1007 //  : ident
1008
1009 // the sign is only used for numeric values (except PERCENTAGE)
1010 // and it's applied on nValue!
1011 std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
1012 {
1013     std::unique_ptr<CSS1Expression> pRoot;
1014     CSS1Expression *pLast = nullptr;
1015
1016     // property
1017     if( CSS1_IDENT != m_nToken )
1018     {
1019         // missing property
1020         return pRoot;
1021     }
1022     rProperty = m_aToken;
1023
1024     m_nToken = GetNextToken();
1025
1026     // ':'
1027     if( CSS1_COLON != m_nToken )
1028     {
1029         // missing ':'
1030         return pRoot;
1031     }
1032     m_nToken = GetNextToken();
1033
1034     // term [operator term]*
1035     // here we're pretty lax regarding the syntax, but this shouldn't
1036     // be a problem
1037     bool bDone = false;
1038     sal_Unicode cSign = 0, cOp = 0;
1039     CSS1Expression *pNew = nullptr;
1040
1041     LOOP_CHECK_DECL
1042
1043     while( !bDone && IsParserWorking() )
1044     {
1045         LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1046
1047         switch( m_nToken )
1048         {
1049         case CSS1_MINUS:
1050             cSign = '-';
1051             break;
1052
1053         case CSS1_PLUS:
1054             cSign = '+';
1055             break;
1056
1057         case CSS1_NUMBER:
1058         case CSS1_LENGTH:
1059         case CSS1_PIXLENGTH:
1060         case CSS1_EMS:
1061         case CSS1_EMX:
1062             if( '-'==cSign )
1063                 m_nValue = -m_nValue;
1064             [[fallthrough]];
1065         case CSS1_STRING:
1066         case CSS1_PERCENTAGE:
1067         case CSS1_IDENT:
1068         case CSS1_URL:
1069         case CSS1_RGB:
1070         case CSS1_HEXCOLOR:
1071             pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1072             m_nValue = 0; // otherwise this also is applied to next ident
1073             cSign = 0;
1074             cOp = 0;
1075             break;
1076
1077         case CSS1_SLASH:
1078             cOp = '/';
1079             cSign = 0;
1080             break;
1081
1082         case CSS1_COMMA:
1083             cOp = ',';
1084             cSign = 0;
1085             break;
1086
1087         default:
1088             bDone = true;
1089             break;
1090         }
1091
1092         // if created a new expression save it
1093         if( pNew )
1094         {
1095             OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1096                     "Root-Selector, but no Last" );
1097             if( pLast )
1098                 pLast->SetNext( pNew );
1099             else
1100                 pRoot.reset(pNew);
1101
1102             pLast = pNew;
1103             pNew = nullptr;
1104         }
1105
1106         if( !bDone )
1107             m_nToken = GetNextToken();
1108     }
1109
1110     if( !pRoot )
1111     {
1112         // missing term
1113         return pRoot;
1114     }
1115
1116     // prio?
1117     if( CSS1_IMPORTANT_SYM==m_nToken )
1118     {
1119         // IMPORTANT_SYM
1120         m_nToken = GetNextToken();
1121     }
1122
1123     return pRoot;
1124 }
1125
1126 CSS1Parser::CSS1Parser()
1127     : m_bWhiteSpace(false)
1128     , m_bEOF(false)
1129     , m_cNextCh(0)
1130     , m_nInPos(0)
1131     , m_nlLineNr(0)
1132     , m_nlLinePos(0)
1133     , m_nValue(0)
1134     , m_eState(CSS1_PAR_ACCEPTED)
1135     , m_nToken(CSS1_NULL)
1136 {
1137 }
1138
1139 CSS1Parser::~CSS1Parser()
1140 {
1141 }
1142
1143 void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1144 {
1145     OUString aTmp( rIn );
1146
1147     sal_Unicode c;
1148     while( !aTmp.isEmpty() &&
1149            ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1150         aTmp = aTmp.copy( 1 );
1151
1152     while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1153            || '\t'==c || '\r'==c || '\n'==c ) )
1154         aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1155
1156     // remove SGML comments
1157     if( aTmp.getLength() >= 4 &&
1158         aTmp.startsWith( "<!--" ) )
1159         aTmp = aTmp.copy( 4 );
1160
1161     if( aTmp.getLength() >=3 &&
1162         aTmp.endsWith("-->") )
1163         aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1164
1165     if( aTmp.isEmpty() )
1166         return;
1167
1168     InitRead( aTmp );
1169
1170     ParseStyleSheet();
1171 }
1172
1173 void CSS1Parser::ParseStyleOption( const OUString& rIn )
1174 {
1175     if( rIn.isEmpty() )
1176         return;
1177
1178     InitRead( rIn );
1179
1180     // fdo#41796: skip over spurious semicolons
1181     while (CSS1_SEMICOLON == m_nToken)
1182     {
1183         m_nToken = GetNextToken();
1184     }
1185
1186     OUString aProperty;
1187     std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1188     if( !pExpr )
1189         return;
1190
1191     // process expression
1192     DeclarationParsed( aProperty, std::move(pExpr) );
1193
1194     LOOP_CHECK_DECL
1195
1196     // [ ';' declaration ]*
1197     while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
1198     {
1199         LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1200
1201         m_nToken = GetNextToken();
1202         if( CSS1_IDENT==m_nToken )
1203         {
1204             std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1205             if( pExp )
1206             {
1207                 // process expression
1208                 DeclarationParsed( aProperty, std::move(pExp) );
1209             }
1210         }
1211     }
1212 }
1213
1214 void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1215 {
1216 }
1217
1218 void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1219                                     std::unique_ptr<CSS1Expression> /* pExpr */ )
1220 {
1221 }
1222
1223 CSS1Selector::~CSS1Selector()
1224 {
1225     delete m_pNext;
1226 }
1227
1228 CSS1Expression::~CSS1Expression()
1229 {
1230     delete pNext;
1231 }
1232
1233 void CSS1Expression::GetURL( OUString& rURL  ) const
1234 {
1235     OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1236
1237     OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1238                 aValue.getLength() > 5 &&
1239                 '(' == aValue[3] &&
1240                 ')' == aValue[aValue.getLength()-1],
1241                 "no valid URL(...)" );
1242
1243     if( aValue.getLength() <= 5 )
1244         return;
1245
1246     rURL = aValue.copy( 4, aValue.getLength() - 5 );
1247
1248     // tdf#94088 original stripped only spaces, but there may also be
1249     // double quotes in CSS style URLs, so be prepared to spaces followed
1250     // by a single quote followed by spaces
1251     const sal_Unicode aSpace(' ');
1252     const sal_Unicode aSingleQuote('\'');
1253
1254     rURL = comphelper::string::strip(rURL, aSpace);
1255     rURL = comphelper::string::strip(rURL, aSingleQuote);
1256     rURL = comphelper::string::strip(rURL, aSpace);
1257 }
1258
1259 bool CSS1Expression::GetColor( Color &rColor ) const
1260 {
1261     OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1262                 CSS1_HEXCOLOR==eType || CSS1_STRING==eType,
1263                 "CSS1-Expression cannot be colour" );
1264
1265     bool bRet = false;
1266     sal_uInt32 nColor = SAL_MAX_UINT32;
1267
1268     switch( eType )
1269     {
1270     case CSS1_RGB:
1271         {
1272             // fourth value to 255 means no alpha transparency
1273             // so the right by default value
1274             sal_uInt8 aColors[4] = { 0, 0, 0, 255 };
1275
1276             // it can be "rgb" or "rgba"
1277             if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1278                     (aValue[3] != '(' && aValue[4] != '(' ) || aValue[aValue.getLength()-1] != ')')
1279             {
1280                 break;
1281             }
1282
1283             sal_Int32 nPos = aValue.startsWithIgnoreAsciiCase( "rgba" )?5:4; // start after "rgba(" or "rgb("
1284             char cSep = (aValue.indexOf(',') != -1)?',':' ';
1285             // alpha value can be after a "/" or ","
1286             bool bIsSepAlphaDiv = (aValue.indexOf('/') != -1)?true:false;
1287             for ( int nCol = 0; nCol < 4 && nPos > 0; ++nCol )
1288             {
1289                 const std::u16string_view aNumber = o3tl::getToken(aValue, 0, cSep, nPos);
1290
1291                 sal_Int32 nNumber = o3tl::toInt32(aNumber);
1292                 if( nNumber<0 )
1293                 {
1294                     nNumber = 0;
1295                 }
1296                 else if( aNumber.find('%') != std::u16string_view::npos )
1297                 {
1298                     if( nNumber > 100 )
1299                         nNumber = 100;
1300                     nNumber *= 255;
1301                     nNumber /= 100;
1302                 }
1303                 else if( nNumber > 255 )
1304                     nNumber = 255;
1305                 else if( aNumber.find('.') != std::u16string_view::npos )
1306                 {
1307                     // in this case aNumber contains something like "0.3" so not an sal_Int32
1308                     nNumber = static_cast<sal_Int32>(255.0*o3tl::toDouble(aNumber));
1309                 }
1310                 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1311                 // rgb with alpha and '/' has this form: rgb(255 0 0 / 50%)
1312                 if (bIsSepAlphaDiv && nCol == 2)
1313                 {
1314                     // but there can be some spaces or not before and after the "/", so skip them
1315                     while (aValue[nPos] == '/' || aValue[nPos] == ' ')
1316                       ++nPos;
1317                 }
1318             }
1319
1320             rColor.SetRed( aColors[0] );
1321             rColor.SetGreen( aColors[1] );
1322             rColor.SetBlue( aColors[2] );
1323             rColor.SetAlpha( aColors[3] );
1324
1325             bRet = true;    // something different than a colour isn't possible
1326         }
1327         break;
1328
1329     case CSS1_IDENT:
1330     case CSS1_STRING:
1331         {
1332             OUString aTmp( aValue.toAsciiUpperCase() );
1333             nColor = GetHTMLColor( aTmp );
1334             bRet = nColor != SAL_MAX_UINT32;
1335         }
1336         if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1337             aValue[0] != '#' )
1338             break;
1339         [[fallthrough]];
1340     case CSS1_HEXCOLOR:
1341         {
1342             // MS-IE hack: colour can also be a string
1343             sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1344             bool bDouble = aValue.getLength()-nOffset == 3;
1345             sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1346
1347             nColor = 0;
1348             for( ; i<nEnd; i++ )
1349             {
1350                 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1351                                                          : '0' );
1352                 if( c >= '0' && c <= '9' )
1353                     c -= 48;
1354                 else if( c >= 'A' && c <= 'F' )
1355                     c -= 55;
1356                 else if( c >= 'a' && c <= 'f' )
1357                     c -= 87;
1358                 else
1359                     c = 16;
1360
1361                 nColor *= 16;
1362                 if( c<16 )
1363                     nColor += c;
1364                 if( bDouble )
1365                 {
1366                     nColor *= 16;
1367                     if( c<16 )
1368                         nColor += c;
1369                 }
1370             }
1371             bRet = true;
1372         }
1373         break;
1374     default:
1375         ;
1376     }
1377
1378     if( bRet && nColor!=SAL_MAX_UINT32 )
1379     {
1380         rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1381         rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1382         rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1383     }
1384
1385     return bRet;
1386 }
1387
1388 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */