sw/source/filter/html/parcss1.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <o3tl/string_view.hxx>
  21 #include <o3tl/unit_conversion.hxx>
  22 #include <osl/diagnose.h>
  23 #include <rtl/character.hxx>
  24 #include <rtl/ustrbuf.hxx>
  25 #include <tools/color.hxx>
  26 #include <tools/solar.h>
  27 #include <svtools/htmltokn.h>
  28 #include <comphelper/string.hxx>
  29 #include "parcss1.hxx"
  30
  31 // Loop-Check: Used to avoid infinite loops, is checked after every
  32 // loop, if there is progress of the input position
  33 #define LOOP_CHECK
  34
  35 #ifdef LOOP_CHECK
  36
  37 #define LOOP_CHECK_DECL \
  38     sal_Int32 nOldInPos = SAL_MAX_INT32;
  39 #define LOOP_CHECK_RESTART \
  40     nOldInPos = SAL_MAX_INT32;
  41 #define LOOP_CHECK_CHECK( where ) \
  42     OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where );    \
  43     if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) )                    \
  44         break;                                                              \
  45     else                                                                    \
  46         nOldInPos = m_nInPos;
  47
  48 #else
  49
  50 #define LOOP_CHECK_DECL
  51 #define LOOP_CHECK_RESTART
  52 #define LOOP_CHECK_CHECK( where )
  53
  54 #endif
  55
  56 const sal_Int32 MAX_LEN = 1024;
  57
  58 void CSS1Parser::InitRead( const OUString& rIn )
  59 {
  60     m_nlLineNr = 0;
  61     m_nlLinePos = 0;
  62
  63     m_bWhiteSpace = true; // if nothing was read it's like there was WS
  64     m_bEOF = false;
  65     m_eState = CSS1_PAR_WORKING;
  66     m_nValue = 0.;
  67
  68     m_aIn = rIn;
  69     m_nInPos = 0;
  70     m_cNextCh = GetNextChar();
  71     m_nToken = GetNextToken();
  72 }
  73
  74 sal_Unicode CSS1Parser::GetNextChar()
  75 {
  76     if( m_nInPos >= m_aIn.getLength() )
  77     {
  78         m_bEOF = true;
  79         return sal_Unicode(EOF);
  80     }
  81
  82     sal_Unicode c = m_aIn[m_nInPos];
  83     m_nInPos++;
  84
  85     if( c == '\n' )
  86     {
  87         ++m_nlLineNr;
  88         m_nlLinePos = 1;
  89     }
  90     else
  91         ++m_nlLinePos;
  92
  93     return c;
  94 }
  95
  96 // This function implements the scanner described in
  97
  98 //       http://www.w3.org/pub/WWW/TR/WD-css1.html
  99 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
 100
 101 // for CSS1. It's a direct implementation of the
 102 // described Lex grammar.
 103
 104 CSS1Token CSS1Parser::GetNextToken()
 105 {
 106     CSS1Token nRet = CSS1_NULL;
 107     m_aToken.clear();
 108
 109     do {
 110         // remember if white space was read
 111         bool bPrevWhiteSpace = m_bWhiteSpace;
 112         m_bWhiteSpace = false;
 113
 114         bool bNextCh = true;
 115         switch( m_cNextCh )
 116         {
 117         case '/': // COMMENT | '/'
 118             {
 119                 m_cNextCh = GetNextChar();
 120                 if( '*' == m_cNextCh )
 121                 {
 122                     // COMMENT
 123                     m_cNextCh = GetNextChar();
 124
 125                     bool bAsterisk = false;
 126                     while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
 127                     {
 128                         bAsterisk = ('*'==m_cNextCh);
 129                         m_cNextCh = GetNextChar();
 130                     }
 131                 }
 132                 else
 133                 {
 134                     // '/'
 135                     bNextCh = false;
 136                     nRet = CSS1_SLASH;
 137                 }
 138             }
 139             break;
 140
 141         case '@': // '@import' | '@XXX'
 142             {
 143                 m_cNextCh = GetNextChar();
 144                 if (rtl::isAsciiAlpha(m_cNextCh))
 145                 {
 146                     // scan the next identifier
 147                     OUStringBuffer sTmpBuffer(32);
 148                     do {
 149                         sTmpBuffer.append( m_cNextCh );
 150                         m_cNextCh = GetNextChar();
 151                     } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 152                              '-' == m_cNextCh) && !IsEOF() );
 153
 154                     m_aToken += sTmpBuffer;
 155
 156                     // check if we know it
 157                     switch( m_aToken[0] )
 158                     {
 159                     case 'i':
 160                     case 'I':
 161                         if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
 162                             nRet = CSS1_IMPORT_SYM;
 163                         break;
 164                     case 'p':
 165                     case 'P':
 166                         if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
 167                             nRet = CSS1_PAGE_SYM;
 168                         break;
 169                     }
 170
 171                     // error handling: ignore '@indent' and the rest until
 172                     // semicolon at end of the next block
 173                     if( CSS1_NULL==nRet )
 174                     {
 175                         m_aToken.clear();
 176                         int nBlockLvl = 0;
 177                         sal_Unicode cQuoteCh = 0;
 178                         bool bDone = false, bEscape = false;
 179                         while( !bDone && !IsEOF() )
 180                         {
 181                             bool bOldEscape = bEscape;
 182                             bEscape = false;
 183                             switch( m_cNextCh )
 184                             {
 185                             case '{':
 186                                 if( !cQuoteCh && !bOldEscape )
 187                                     nBlockLvl++;
 188                                 break;
 189                             case ';':
 190                                 if( !cQuoteCh && !bOldEscape )
 191                                     bDone = nBlockLvl==0;
 192                                 break;
 193                             case '}':
 194                                 if( !cQuoteCh && !bOldEscape )
 195                                     bDone = --nBlockLvl==0;
 196                                 break;
 197                             case '\"':
 198                             case '\'':
 199                                 if( !bOldEscape )
 200                                 {
 201                                     if( cQuoteCh )
 202                                     {
 203                                         if( cQuoteCh == m_cNextCh )
 204                                             cQuoteCh = 0;
 205                                     }
 206                                     else
 207                                     {
 208                                         cQuoteCh = m_cNextCh;
 209                                     }
 210                                 }
 211                                 break;
 212                             case '\\':
 213                                 if( !bOldEscape )
 214                                     bEscape = true;
 215                                 break;
 216                             }
 217                             m_cNextCh = GetNextChar();
 218                         }
 219                     }
 220
 221                     bNextCh = false;
 222                 }
 223             }
 224             break;
 225
 226         case '!': // '!' 'legal' | '!' 'important' | syntax error
 227             {
 228                 // ignore white space
 229                 m_cNextCh = GetNextChar();
 230                 while( ( ' ' == m_cNextCh ||
 231                        (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
 232                 {
 233                     m_bWhiteSpace = true;
 234                     m_cNextCh = GetNextChar();
 235                 }
 236
 237                 if( 'i'==m_cNextCh || 'I'==m_cNextCh)
 238                 {
 239                     // scan next identifier
 240                     OUStringBuffer sTmpBuffer(32);
 241                     do {
 242                         sTmpBuffer.append( m_cNextCh );
 243                         m_cNextCh = GetNextChar();
 244                     } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 245                              '-' == m_cNextCh) && !IsEOF() );
 246
 247                     m_aToken += sTmpBuffer;
 248
 249                     if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
 250                         m_aToken.equalsIgnoreAsciiCase( "important" ) )
 251                     {
 252                         // '!' 'important'
 253                         nRet = CSS1_IMPORTANT_SYM;
 254                     }
 255                     else
 256                     {
 257                         // error handling: ignore '!', not IDENT
 258                         nRet = CSS1_IDENT;
 259                     }
 260
 261                     m_bWhiteSpace = false;
 262                     bNextCh = false;
 263                 }
 264                 else
 265                 {
 266                     // error handling: ignore '!'
 267                     bNextCh = false;
 268                 }
 269             }
 270             break;
 271
 272         case '\"':
 273         case '\'': // STRING
 274             {
 275                 // \... isn't possible yet!!!
 276                 sal_Unicode cQuoteChar = m_cNextCh;
 277                 m_cNextCh = GetNextChar();
 278
 279                 OUStringBuffer sTmpBuffer( MAX_LEN );
 280                 do {
 281                     sTmpBuffer.append( m_cNextCh );
 282                     m_cNextCh = GetNextChar();
 283                 } while( cQuoteChar != m_cNextCh && !IsEOF() );
 284
 285                 m_aToken += sTmpBuffer;
 286
 287                 nRet = CSS1_STRING;
 288             }
 289             break;
 290
 291         case '0':
 292         case '1':
 293         case '2':
 294         case '3':
 295         case '4':
 296         case '5':
 297         case '6':
 298         case '7':
 299         case '8':
 300         case '9': // NUMBER | PERCENTAGE | LENGTH
 301             {
 302                 // save current position
 303                 std::size_t nInPosSave = m_nInPos;
 304                 sal_Unicode cNextChSave = m_cNextCh;
 305                 sal_uInt32 nlLineNrSave = m_nlLineNr;
 306                 sal_uInt32 nlLinePosSave = m_nlLinePos;
 307                 bool bEOFSave = m_bEOF;
 308
 309                 // first try to parse a hex digit
 310                 OUStringBuffer sTmpBuffer( 16 );
 311                 do {
 312                     sTmpBuffer.append( m_cNextCh );
 313                     m_cNextCh = GetNextChar();
 314                 } while( sTmpBuffer.getLength() < 7 &&
 315                          ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 316                            ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
 317                            ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
 318                          !IsEOF() );
 319
 320                 if( sTmpBuffer.getLength()==6 )
 321                 {
 322                     // we found a color in hex
 323                     m_aToken += sTmpBuffer;
 324                     nRet = CSS1_HEXCOLOR;
 325                     bNextCh = false;
 326
 327                     break;
 328                 }
 329
 330                 // otherwise we try a number
 331                 m_nInPos = nInPosSave;
 332                 m_cNextCh = cNextChSave;
 333                 m_nlLineNr = nlLineNrSave;
 334                 m_nlLinePos = nlLinePosSave;
 335                 m_bEOF = bEOFSave;
 336
 337                 // first parse the number
 338                 sTmpBuffer.setLength( 0 );
 339                 do {
 340                     sTmpBuffer.append( m_cNextCh );
 341                     m_cNextCh = GetNextChar();
 342                 } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
 343                          !IsEOF() );
 344
 345                 m_aToken += sTmpBuffer;
 346                 m_nValue = m_aToken.toDouble();
 347
 348                 // ignore white space
 349                 while( ( ' ' == m_cNextCh ||
 350                        (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
 351                 {
 352                     m_bWhiteSpace = true;
 353                     m_cNextCh = GetNextChar();
 354                 }
 355
 356                 // check now, of there is a unit
 357                 switch( m_cNextCh )
 358                 {
 359                 case '%': // PERCENTAGE
 360                     m_bWhiteSpace = false;
 361                     nRet = CSS1_PERCENTAGE;
 362                     break;
 363
 364                 case 'c':
 365                 case 'C': // LENGTH cm | LENGTH IDENT
 366                 case 'e':
 367                 case 'E': // LENGTH (em | ex) | LENGTH IDENT
 368                 case 'i':
 369                 case 'I': // LENGTH inch | LENGTH IDENT
 370                 case 'p':
 371                 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
 372                 case 'm':
 373                 case 'M': // LENGTH mm | LENGTH IDENT
 374                     {
 375                         // save current position
 376                         sal_Int32 nInPosOld = m_nInPos;
 377                         sal_Unicode cNextChOld = m_cNextCh;
 378                         sal_uInt32 nlLineNrOld  = m_nlLineNr;
 379                         sal_uInt32 nlLinePosOld = m_nlLinePos;
 380                         bool bEOFOld = m_bEOF;
 381
 382                         // parse the next identifier
 383                         OUStringBuffer sTmpBuffer2(64);
 384                         do {
 385                             sTmpBuffer2.append(static_cast<sal_Unicode>(rtl::toAsciiLowerCase(m_cNextCh)));
 386                             m_cNextCh = GetNextChar();
 387                         } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 388                                  '-' == m_cNextCh) && !IsEOF() );
 389
 390                         OUString aIdent = sTmpBuffer2.makeStringAndClear();
 391                         nRet = CSS1_NUMBER;
 392
 393                         // Is it a unit?
 394                         switch( aIdent[0] )
 395                         {
 396                         case 'c':
 397                             if (aIdent == "cm")
 398                             {
 399                                 m_nValue = o3tl::convert(m_nValue, o3tl::Length::cm, o3tl::Length::twip);
 400                                 nRet = CSS1_LENGTH;
 401                             }
 402                             break;
 403                         case 'e':
 404                             if (aIdent == "em")
 405                                 nRet = CSS1_EMS;
 406                             else if (aIdent == "ex")
 407                                 nRet = CSS1_EMX;
 408                             break;
 409                         case 'i':
 410                             if (aIdent == "in")
 411                             {
 412                                 nRet = CSS1_LENGTH;
 413                                 m_nValue = o3tl::convert(m_nValue, o3tl::Length::in, o3tl::Length::twip);
 414                             }
 415                             break;
 416                         case 'm':
 417                             if (aIdent == "mm")
 418                             {
 419                                 nRet = CSS1_LENGTH;
 420                                 m_nValue = o3tl::convert(m_nValue, o3tl::Length::mm, o3tl::Length::twip);
 421                             }
 422                             break;
 423                         case 'p':
 424                             if (aIdent == "pt")
 425                             {
 426                                 nRet = CSS1_LENGTH;
 427                                 m_nValue = o3tl::convert(m_nValue, o3tl::Length::pt, o3tl::Length::twip);
 428                             }
 429                             else if (aIdent == "pc")
 430                             {
 431                                 nRet = CSS1_LENGTH;
 432                                 m_nValue = o3tl::convert(m_nValue, o3tl::Length::pc, o3tl::Length::twip);
 433                             }
 434                             else if (aIdent == "px")
 435                                 nRet = CSS1_PIXLENGTH;
 436                             break;
 437                         }
 438
 439                         if( nRet == CSS1_NUMBER )
 440                         {
 441                             m_nInPos = nInPosOld;
 442                             m_cNextCh = cNextChOld;
 443                             m_nlLineNr = nlLineNrOld;
 444                             m_nlLinePos = nlLinePosOld;
 445                             m_bEOF = bEOFOld;
 446                         }
 447                         else
 448                         {
 449                             m_bWhiteSpace = false;
 450                         }
 451                         bNextCh = false;
 452                     }
 453                     break;
 454                 default: // NUMBER IDENT
 455                     bNextCh = false;
 456                     nRet = CSS1_NUMBER;
 457                     break;
 458                 }
 459             }
 460             break;
 461
 462         case ':': // ':'
 463             // catch link/visited/active !!!
 464             nRet = CSS1_COLON;
 465             break;
 466
 467         case '.': // DOT_W_WS | DOT_WO_WS
 468             nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
 469             break;
 470
 471         case '+': // '+'
 472             nRet = CSS1_PLUS;
 473             break;
 474
 475         case '-': // '-'
 476             nRet = CSS1_MINUS;
 477             break;
 478
 479         case '{': // '{'
 480             nRet = CSS1_OBRACE;
 481             break;
 482
 483         case '}': // '}'
 484             nRet = CSS1_CBRACE;
 485             break;
 486
 487         case ';': // ';'
 488             nRet = CSS1_SEMICOLON;
 489             break;
 490
 491         case ',': // ','
 492             nRet = CSS1_COMMA;
 493             break;
 494
 495         case '#': // '#'
 496             m_cNextCh = GetNextChar();
 497             if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 498                 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
 499                 ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
 500             {
 501                 // save current position
 502                 sal_Int32 nInPosSave = m_nInPos;
 503                 sal_Unicode cNextChSave = m_cNextCh;
 504                 sal_uInt32 nlLineNrSave = m_nlLineNr;
 505                 sal_uInt32 nlLinePosSave = m_nlLinePos;
 506                 bool bEOFSave = m_bEOF;
 507
 508                 // first try to parse a hex digit
 509                 OUStringBuffer sTmpBuffer(8);
 510                 do {
 511                     sTmpBuffer.append( m_cNextCh );
 512                     m_cNextCh = GetNextChar();
 513                 } while( sTmpBuffer.getLength() < 9 &&
 514                          ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 515                            ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
 516                            ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
 517                          !IsEOF() );
 518
 519                 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
 520                 {
 521                     // we found a color in hex (RGB)
 522                     m_aToken += sTmpBuffer;
 523                     nRet = CSS1_HEXCOLOR;
 524                     bNextCh = false;
 525
 526                     break;
 527                 }
 528
 529                 if( sTmpBuffer.getLength()==8 )
 530                 {
 531                     // we found a color in hex (RGBA)
 532                     // we convert it to RGB assuming white background
 533                     sal_uInt32 nColor = sTmpBuffer.makeStringAndClear().toUInt32(16);
 534                     sal_uInt32 nRed = (nColor & 0xff000000) >> 24;
 535                     sal_uInt32 nGreen = (nColor & 0xff0000) >> 16;
 536                     sal_uInt32 nBlue = (nColor & 0xff00) >> 8;
 537                     double nAlpha = (nColor & 0xff) / 255.0;
 538                     nRed = (1 - nAlpha) * 255 + nAlpha * nRed;
 539                     nGreen = (1 - nAlpha) * 255 + nAlpha * nGreen;
 540                     nBlue = (1 - nAlpha) * 255 + nAlpha * nBlue;
 541                     nColor = (nRed << 16) + (nGreen << 8) + nBlue;
 542                     m_aToken += OUString::number(nColor, 16);
 543                     nRet = CSS1_HEXCOLOR;
 544                     bNextCh = false;
 545
 546                     break;
 547                 }
 548
 549                 // otherwise we try a number
 550                 m_nInPos = nInPosSave;
 551                 m_cNextCh = cNextChSave;
 552                 m_nlLineNr = nlLineNrSave;
 553                 m_nlLinePos = nlLinePosSave;
 554                 m_bEOF = bEOFSave;
 555             }
 556
 557             nRet = CSS1_HASH;
 558             bNextCh = false;
 559             break;
 560
 561         case ' ':
 562         case '\t':
 563         case '\r':
 564         case '\n': // White-Space
 565             m_bWhiteSpace = true;
 566             break;
 567
 568         case sal_Unicode(EOF):
 569             if( IsEOF() )
 570             {
 571                 m_eState = CSS1_PAR_ACCEPTED;
 572                 bNextCh = false;
 573                 break;
 574             }
 575             [[fallthrough]];
 576
 577         default: // IDENT | syntax error
 578             if (rtl::isAsciiAlpha(m_cNextCh))
 579             {
 580                 // IDENT
 581
 582                 bool bHexColor = true;
 583
 584                 // parse the next identifier
 585                 OUStringBuffer sTmpBuffer(64);
 586                 do {
 587                     sTmpBuffer.append( m_cNextCh );
 588                     if( bHexColor )
 589                     {
 590                         bHexColor =  sTmpBuffer.getLength()<7 &&
 591                                      ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
 592                                        ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
 593                                        ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
 594                     }
 595                     m_cNextCh = GetNextChar();
 596                 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
 597                            '-' == m_cNextCh) && !IsEOF() );
 598
 599                 m_aToken += sTmpBuffer;
 600
 601                 if( bHexColor && sTmpBuffer.getLength()==6 )
 602                 {
 603                     bNextCh = false;
 604                     nRet = CSS1_HEXCOLOR;
 605
 606                     break;
 607                 }
 608                 if( '('==m_cNextCh &&
 609                     ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
 610                        m_aToken.equalsIgnoreAsciiCase( "url" )) ||
 611                       (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
 612                        (m_aToken.equalsIgnoreAsciiCase( "rgb" ) || m_aToken.equalsIgnoreAsciiCase( "rgba" ) )
 613                   ) ) )
 614                 {
 615                     int nNestCnt = 0;
 616                     OUStringBuffer sTmpBuffer2(64);
 617                     do {
 618                         sTmpBuffer2.append( m_cNextCh );
 619                         switch( m_cNextCh )
 620                         {
 621                         case '(':   nNestCnt++; break;
 622                         case ')':   nNestCnt--; break;
 623                         }
 624                         m_cNextCh = GetNextChar();
 625                     } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
 626                     sTmpBuffer2.append( m_cNextCh );
 627                     m_aToken += sTmpBuffer2;
 628                     bNextCh = true;
 629                     nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
 630                                 ? CSS1_URL
 631                                 : CSS1_RGB;
 632                 }
 633                 else
 634                 {
 635                     bNextCh = false;
 636                     nRet = CSS1_IDENT;
 637                 }
 638             }
 639             // error handling: ignore digit
 640             break;
 641         }
 642         if( bNextCh )
 643             m_cNextCh = GetNextChar();
 644
 645     } while( CSS1_NULL==nRet && IsParserWorking() );
 646
 647     return nRet;
 648 }
 649
 650 // These functions implement the parser described in
 651
 652 //       http://www.w3.org/pub/WWW/TR/WD-css1.html
 653 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
 654
 655 // for CSS1. It's a direct implementation of the
 656 // described Lex grammar.
 657
 658 // stylesheet
 659 //  : import* rule*
 660
 661 // import
 662 //  : IMPORT_SYM url
 663
 664 // url
 665 //  : STRING
 666
 667 void CSS1Parser::ParseStyleSheet()
 668 {
 669     LOOP_CHECK_DECL
 670
 671     // import*
 672     bool bDone = false;
 673     while( !bDone && IsParserWorking() )
 674     {
 675         LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
 676
 677         switch( m_nToken )
 678         {
 679         case CSS1_IMPORT_SYM:
 680             // IMPORT_SYM url
 681             // URL are skipped without checks
 682             m_nToken = GetNextToken();
 683             break;
 684         case CSS1_IDENT:            // Look-Aheads
 685         case CSS1_DOT_W_WS:
 686         case CSS1_HASH:
 687         case CSS1_PAGE_SYM:
 688             // rule
 689             bDone = true;
 690             break;
 691         default:
 692             // error handling: ignore
 693             break;
 694         }
 695
 696         if( !bDone )
 697             m_nToken = GetNextToken();
 698     }
 699
 700     LOOP_CHECK_RESTART
 701
 702     // rule *
 703     while( IsParserWorking() )
 704     {
 705         LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
 706
 707         switch( m_nToken )
 708         {
 709         case CSS1_IDENT:        // Look-Aheads
 710         case CSS1_DOT_W_WS:
 711         case CSS1_HASH:
 712         case CSS1_PAGE_SYM:
 713             // rule
 714             ParseRule();
 715             break;
 716         default:
 717             // error handling: ignore
 718             m_nToken = GetNextToken();
 719             break;
 720         }
 721     }
 722 }
 723
 724 // rule
 725 //  : selector [ ',' selector ]*
 726 //    '{' declaration [ ';' declaration ]* '}'
 727
 728 void CSS1Parser::ParseRule()
 729 {
 730     // selector
 731     std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
 732     if( !pSelector )
 733         return;
 734
 735     // process selector
 736     SelectorParsed( std::move(pSelector), true );
 737
 738     LOOP_CHECK_DECL
 739
 740     // [ ',' selector ]*
 741     while( CSS1_COMMA==m_nToken && IsParserWorking() )
 742     {
 743         LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
 744
 745         // ignore ','
 746         m_nToken = GetNextToken();
 747
 748         // selector
 749         pSelector = ParseSelector();
 750         if( !pSelector )
 751             return;
 752
 753         // process selector
 754         SelectorParsed( std::move(pSelector), false );
 755     }
 756
 757     // '{'
 758     if( CSS1_OBRACE != m_nToken )
 759         return;
 760     m_nToken = GetNextToken();
 761
 762     // declaration
 763     OUString aProperty;
 764     std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
 765     if( !pExpr )
 766         return;
 767
 768     // process expression
 769     DeclarationParsed( aProperty, std::move(pExpr) );
 770
 771     LOOP_CHECK_RESTART
 772
 773     // [ ';' declaration ]*
 774     while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
 775     {
 776         LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
 777
 778         // ';'
 779         m_nToken = GetNextToken();
 780
 781         // declaration
 782         if( CSS1_IDENT == m_nToken )
 783         {
 784             std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
 785             if( pExp )
 786             {
 787                 // process expression
 788                 DeclarationParsed( aProperty, std::move(pExp));
 789             }
 790         }
 791     }
 792
 793     // '}'
 794     if( CSS1_CBRACE == m_nToken )
 795         m_nToken = GetNextToken();
 796 }
 797
 798 // selector
 799 //  : simple_selector+ [ ':' pseudo_element ]?
 800
 801 // simple_selector
 802 //  : element_name [ DOT_WO_WS class ]?
 803 //  | DOT_W_WS class
 804 //  | id_selector
 805
 806 // element_name
 807 //  : IDENT
 808
 809 // class
 810 //  : IDENT
 811
 812 // id_selector
 813 //  : '#' IDENT
 814
 815 // pseudo_element
 816 //  : IDENT
 817
 818 std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
 819 {
 820     std::unique_ptr<CSS1Selector> pRoot;
 821     CSS1Selector *pLast = nullptr;
 822
 823     bool bDone = false;
 824     CSS1Selector *pNew = nullptr;
 825
 826     LOOP_CHECK_DECL
 827
 828     // simple_selector+
 829     while( !bDone && IsParserWorking() )
 830     {
 831         LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
 832
 833         bool bNextToken = true;
 834
 835         switch( m_nToken )
 836         {
 837         case CSS1_IDENT:
 838             {
 839                 // element_name [ DOT_WO_WS class ]?
 840
 841                 // element_name
 842                 OUString aElement = m_aToken;
 843                 CSS1SelectorType eType = CSS1_SELTYPE_ELEMENT;
 844                 m_nToken = GetNextToken();
 845
 846                 if( CSS1_DOT_WO_WS == m_nToken )
 847                 {
 848                     // DOT_WO_WS
 849                     m_nToken = GetNextToken();
 850
 851                     // class
 852                     if( CSS1_IDENT == m_nToken )
 853                     {
 854                         aElement += "." + m_aToken;
 855                         eType = CSS1_SELTYPE_ELEM_CLASS;
 856                     }
 857                     else
 858                     {
 859                         // missing class
 860                         return pRoot;
 861                     }
 862                 }
 863                 else
 864                 {
 865                     // that was a look-ahead
 866                     bNextToken = false;
 867                 }
 868                 pNew = new CSS1Selector( eType, aElement );
 869             }
 870             break;
 871         case CSS1_DOT_W_WS:
 872             // DOT_W_WS class
 873
 874             // DOT_W_WS
 875             m_nToken = GetNextToken();
 876
 877             if( CSS1_IDENT==m_nToken )
 878             {
 879                 // class
 880                 pNew = new CSS1Selector( CSS1_SELTYPE_CLASS, m_aToken );
 881             }
 882             else
 883             {
 884                 // missing class
 885                 return pRoot;
 886             }
 887             break;
 888         case CSS1_HASH:
 889             // '#' id_selector
 890
 891             // '#'
 892             m_nToken = GetNextToken();
 893
 894             if( CSS1_IDENT==m_nToken )
 895             {
 896                 // id_selector
 897                 pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken );
 898             }
 899             else
 900             {
 901                 // missing id_selector
 902                 return pRoot;
 903             }
 904             break;
 905
 906         case CSS1_PAGE_SYM:
 907             {
 908                 //  @page
 909                 pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken );
 910             }
 911             break;
 912
 913         default:
 914             // stop because we don't know what's next
 915             bDone = true;
 916             break;
 917         }
 918
 919         // if created a new selector then save it
 920         if( pNew )
 921         {
 922             OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
 923                     "Root-Selector, but no Last" );
 924             if( pLast )
 925                 pLast->SetNext( pNew );
 926             else
 927                 pRoot.reset(pNew);
 928
 929             pLast = pNew;
 930             pNew = nullptr;
 931         }
 932
 933         if( bNextToken && !bDone )
 934             m_nToken = GetNextToken();
 935     }
 936
 937     if( !pRoot )
 938     {
 939         // missing simple_selector
 940         return pRoot;
 941     }
 942
 943     // [ ':' pseudo_element ]?
 944     if( CSS1_COLON==m_nToken && IsParserWorking() )
 945     {
 946         // ':' pseudo element
 947         m_nToken = GetNextToken();
 948         if( CSS1_IDENT==m_nToken )
 949         {
 950             if (pLast)
 951                 pLast->SetNext( new CSS1Selector(CSS1_SELTYPE_PSEUDO,m_aToken) );
 952             m_nToken = GetNextToken();
 953         }
 954         else
 955         {
 956             // missing pseudo_element
 957             return pRoot;
 958         }
 959     }
 960
 961     return pRoot;
 962 }
 963
 964 // declaration
 965 //  : property ':' expr prio?
 966 //  | /* empty */
 967
 968 // expression
 969 //  : term [ operator term ]*
 970
 971 // term
 972 //  : unary_operator?
 973 //     [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
 974 //       HEXCOLOR | URL | RGB ]
 975
 976 // operator
 977 //  : '/' | ',' | /* empty */
 978
 979 // unary_operator
 980 //  : '-' | '+'
 981
 982 // property
 983 //  : ident
 984
 985 // the sign is only used for numeric values (except PERCENTAGE)
 986 // and it's applied on nValue!
 987 std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
 988 {
 989     std::unique_ptr<CSS1Expression> pRoot;
 990     CSS1Expression *pLast = nullptr;
 991
 992     // property
 993     if( CSS1_IDENT != m_nToken )
 994     {
 995         // missing property
 996         return pRoot;
 997     }
 998     rProperty = m_aToken;
 999
1000     m_nToken = GetNextToken();
1001
1002     // ':'
1003     if( CSS1_COLON != m_nToken )
1004     {
1005         // missing ':'
1006         return pRoot;
1007     }
1008     m_nToken = GetNextToken();
1009
1010     // term [operator term]*
1011     // here we're pretty lax regarding the syntax, but this shouldn't
1012     // be a problem
1013     bool bDone = false;
1014     sal_Unicode cSign = 0, cOp = 0;
1015     CSS1Expression *pNew = nullptr;
1016
1017     LOOP_CHECK_DECL
1018
1019     while( !bDone && IsParserWorking() )
1020     {
1021         LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1022
1023         switch( m_nToken )
1024         {
1025         case CSS1_MINUS:
1026             cSign = '-';
1027             break;
1028
1029         case CSS1_PLUS:
1030             cSign = '+';
1031             break;
1032
1033         case CSS1_NUMBER:
1034         case CSS1_LENGTH:
1035         case CSS1_PIXLENGTH:
1036         case CSS1_EMS:
1037         case CSS1_EMX:
1038             if( '-'==cSign )
1039                 m_nValue = -m_nValue;
1040             [[fallthrough]];
1041         case CSS1_STRING:
1042         case CSS1_PERCENTAGE:
1043         case CSS1_IDENT:
1044         case CSS1_URL:
1045         case CSS1_RGB:
1046         case CSS1_HEXCOLOR:
1047             pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1048             m_nValue = 0; // otherwise this also is applied to next ident
1049             cSign = 0;
1050             cOp = 0;
1051             break;
1052
1053         case CSS1_SLASH:
1054             cOp = '/';
1055             cSign = 0;
1056             break;
1057
1058         case CSS1_COMMA:
1059             cOp = ',';
1060             cSign = 0;
1061             break;
1062
1063         default:
1064             bDone = true;
1065             break;
1066         }
1067
1068         // if created a new expression save it
1069         if( pNew )
1070         {
1071             OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1072                     "Root-Selector, but no Last" );
1073             if( pLast )
1074                 pLast->SetNext( pNew );
1075             else
1076                 pRoot.reset(pNew);
1077
1078             pLast = pNew;
1079             pNew = nullptr;
1080         }
1081
1082         if( !bDone )
1083             m_nToken = GetNextToken();
1084     }
1085
1086     if( !pRoot )
1087     {
1088         // missing term
1089         return pRoot;
1090     }
1091
1092     // prio?
1093     if( CSS1_IMPORTANT_SYM==m_nToken )
1094     {
1095         // IMPORTANT_SYM
1096         m_nToken = GetNextToken();
1097     }
1098
1099     return pRoot;
1100 }
1101
1102 CSS1Parser::CSS1Parser()
1103     : m_bWhiteSpace(false)
1104     , m_bEOF(false)
1105     , m_cNextCh(0)
1106     , m_nInPos(0)
1107     , m_nlLineNr(0)
1108     , m_nlLinePos(0)
1109     , m_nValue(0)
1110     , m_eState(CSS1_PAR_ACCEPTED)
1111     , m_nToken(CSS1_NULL)
1112 {
1113 }
1114
1115 CSS1Parser::~CSS1Parser()
1116 {
1117 }
1118
1119 void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1120 {
1121     OUString aTmp( rIn );
1122
1123     sal_Unicode c;
1124     while( !aTmp.isEmpty() &&
1125            ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1126         aTmp = aTmp.copy( 1 );
1127
1128     while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1129            || '\t'==c || '\r'==c || '\n'==c ) )
1130         aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1131
1132     // remove SGML comments
1133     if( aTmp.getLength() >= 4 &&
1134         aTmp.startsWith( "<!--" ) )
1135         aTmp = aTmp.copy( 4 );
1136
1137     if( aTmp.getLength() >=3 &&
1138         aTmp.endsWith("-->") )
1139         aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1140
1141     if( aTmp.isEmpty() )
1142         return;
1143
1144     InitRead( aTmp );
1145
1146     ParseStyleSheet();
1147 }
1148
1149 void CSS1Parser::ParseStyleOption( const OUString& rIn )
1150 {
1151     if( rIn.isEmpty() )
1152         return;
1153
1154     InitRead( rIn );
1155
1156     // fdo#41796: skip over spurious semicolons
1157     while (CSS1_SEMICOLON == m_nToken)
1158     {
1159         m_nToken = GetNextToken();
1160     }
1161
1162     OUString aProperty;
1163     std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1164     if( !pExpr )
1165         return;
1166
1167     // process expression
1168     DeclarationParsed( aProperty, std::move(pExpr) );
1169
1170     LOOP_CHECK_DECL
1171
1172     // [ ';' declaration ]*
1173     while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
1174     {
1175         LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1176
1177         m_nToken = GetNextToken();
1178         if( CSS1_IDENT==m_nToken )
1179         {
1180             std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1181             if( pExp )
1182             {
1183                 // process expression
1184                 DeclarationParsed( aProperty, std::move(pExp) );
1185             }
1186         }
1187     }
1188 }
1189
1190 void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1191 {
1192 }
1193
1194 void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1195                                     std::unique_ptr<CSS1Expression> /* pExpr */ )
1196 {
1197 }
1198
1199 CSS1Selector::~CSS1Selector()
1200 {
1201     delete m_pNext;
1202 }
1203
1204 CSS1Expression::~CSS1Expression()
1205 {
1206     delete pNext;
1207 }
1208
1209 void CSS1Expression::GetURL( OUString& rURL  ) const
1210 {
1211     OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1212
1213     OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1214                 aValue.getLength() > 5 &&
1215                 '(' == aValue[3] &&
1216                 ')' == aValue[aValue.getLength()-1],
1217                 "no valid URL(...)" );
1218
1219     if( aValue.getLength() <= 5 )
1220         return;
1221
1222     rURL = aValue.copy( 4, aValue.getLength() - 5 );
1223
1224     // tdf#94088 original stripped only spaces, but there may also be
1225     // double quotes in CSS style URLs, so be prepared to spaces followed
1226     // by a single quote followed by spaces
1227     const sal_Unicode aSpace(' ');
1228     const sal_Unicode aSingleQuote('\'');
1229
1230     rURL = comphelper::string::strip(rURL, aSpace);
1231     rURL = comphelper::string::strip(rURL, aSingleQuote);
1232     rURL = comphelper::string::strip(rURL, aSpace);
1233 }
1234
1235 bool CSS1Expression::GetColor( Color &rColor ) const
1236 {
1237     OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1238                 CSS1_HEXCOLOR==eType || CSS1_STRING==eType,
1239                 "CSS1-Expression cannot be colour" );
1240
1241     bool bRet = false;
1242     sal_uInt32 nColor = SAL_MAX_UINT32;
1243
1244     switch( eType )
1245     {
1246     case CSS1_RGB:
1247         {
1248             // fourth value to 255 means no alpha transparency
1249             // so the right by default value
1250             sal_uInt8 aColors[4] = { 0, 0, 0, 255 };
1251
1252             // it can be "rgb" or "rgba"
1253             if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1254                     (aValue[3] != '(' && aValue[4] != '(' ) || aValue[aValue.getLength()-1] != ')')
1255             {
1256                 break;
1257             }
1258
1259             sal_Int32 nPos = aValue.startsWithIgnoreAsciiCase( "rgba" )?5:4; // start after "rgba(" or "rgb("
1260             char cSep = (aValue.indexOf(',') != -1)?',':' ';
1261             // alpha value can be after a "/" or ","
1262             bool bIsSepAlphaDiv = (aValue.indexOf('/') != -1)?true:false;
1263             for ( int nCol = 0; nCol < 4 && nPos > 0; ++nCol )
1264             {
1265                 const std::u16string_view aNumber = o3tl::getToken(aValue, 0, cSep, nPos);
1266
1267                 sal_Int32 nNumber = o3tl::toInt32(aNumber);
1268                 if( nNumber<0 )
1269                 {
1270                     nNumber = 0;
1271                 }
1272                 else if( aNumber.find('%') != std::u16string_view::npos )
1273                 {
1274                     if( nNumber > 100 )
1275                         nNumber = 100;
1276                     nNumber *= 255;
1277                     nNumber /= 100;
1278                 }
1279                 else if( nNumber > 255 )
1280                     nNumber = 255;
1281                 else if( aNumber.find('.') != std::u16string_view::npos )
1282                 {
1283                     // in this case aNumber contains something like "0.3" so not an sal_Int32
1284                     nNumber = static_cast<sal_Int32>(255.0*o3tl::toDouble(aNumber));
1285                 }
1286                 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1287                 // rgb with alpha and '/' has this form: rgb(255 0 0 / 50%)
1288                 if (bIsSepAlphaDiv && nCol == 2)
1289                 {
1290                     // but there can be some spaces or not before and after the "/", so skip them
1291                     while (aValue[nPos] == '/' || aValue[nPos] == ' ')
1292                       ++nPos;
1293                 }
1294             }
1295
1296             rColor.SetRed( aColors[0] );
1297             rColor.SetGreen( aColors[1] );
1298             rColor.SetBlue( aColors[2] );
1299             rColor.SetAlpha( aColors[3] );
1300
1301             bRet = true;    // something different than a colour isn't possible
1302         }
1303         break;
1304
1305     case CSS1_IDENT:
1306     case CSS1_STRING:
1307         {
1308             OUString aTmp( aValue.toAsciiUpperCase() );
1309             nColor = GetHTMLColor( aTmp );
1310             bRet = nColor != SAL_MAX_UINT32;
1311         }
1312         if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1313             aValue[0] != '#' )
1314             break;
1315         [[fallthrough]];
1316     case CSS1_HEXCOLOR:
1317         {
1318             // MS-IE hack: colour can also be a string
1319             sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1320             bool bDouble = aValue.getLength()-nOffset == 3;
1321             sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1322
1323             nColor = 0;
1324             for( ; i<nEnd; i++ )
1325             {
1326                 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1327                                                          : '0' );
1328                 if( c >= '0' && c <= '9' )
1329                     c -= 48;
1330                 else if( c >= 'A' && c <= 'F' )
1331                     c -= 55;
1332                 else if( c >= 'a' && c <= 'f' )
1333                     c -= 87;
1334                 else
1335                     c = 16;
1336
1337                 nColor *= 16;
1338                 if( c<16 )
1339                     nColor += c;
1340                 if( bDouble )
1341                 {
1342                     nColor *= 16;
1343                     if( c<16 )
1344                         nColor += c;
1345                 }
1346             }
1347             bRet = true;
1348         }
1349         break;
1350     default:
1351         ;
1352     }
1353
1354     if( bRet && nColor!=SAL_MAX_UINT32 )
1355     {
1356         rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1357         rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1358         rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1359     }
1360
1361     return bRet;
1362 }
1363
1364 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */