remove assert looking for new compatibilityMode DOCX
[LibreOffice.git] / sw / source / filter / html / parcss1.cxx
bloba32de72ab52f674993569312b3625e4d3348fb5a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <o3tl/string_view.hxx>
21 #include <o3tl/unit_conversion.hxx>
22 #include <osl/diagnose.h>
23 #include <rtl/character.hxx>
24 #include <rtl/ustrbuf.hxx>
25 #include <tools/color.hxx>
26 #include <tools/solar.h>
27 #include <svtools/htmltokn.h>
28 #include <comphelper/string.hxx>
29 #include "parcss1.hxx"
31 // Loop-Check: Used to avoid infinite loops, is checked after every
32 // loop, if there is progress of the input position
33 #define LOOP_CHECK
35 #ifdef LOOP_CHECK
37 #define LOOP_CHECK_DECL \
38 sal_Int32 nOldInPos = SAL_MAX_INT32;
39 #define LOOP_CHECK_RESTART \
40 nOldInPos = SAL_MAX_INT32;
41 #define LOOP_CHECK_CHECK( where ) \
42 OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \
43 if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \
44 break; \
45 else \
46 nOldInPos = m_nInPos;
48 #else
50 #define LOOP_CHECK_DECL
51 #define LOOP_CHECK_RESTART
52 #define LOOP_CHECK_CHECK( where )
54 #endif
56 const sal_Int32 MAX_LEN = 1024;
58 void CSS1Parser::InitRead( const OUString& rIn )
60 m_nlLineNr = 0;
61 m_nlLinePos = 0;
63 m_bWhiteSpace = true; // if nothing was read it's like there was WS
64 m_bEOF = false;
65 m_eState = CSS1_PAR_WORKING;
66 m_nValue = 0.;
68 m_aIn = rIn;
69 m_nInPos = 0;
70 m_cNextCh = GetNextChar();
71 m_nToken = GetNextToken();
74 sal_Unicode CSS1Parser::GetNextChar()
76 if( m_nInPos >= m_aIn.getLength() )
78 m_bEOF = true;
79 return sal_Unicode(EOF);
82 sal_Unicode c = m_aIn[m_nInPos];
83 m_nInPos++;
85 if( c == '\n' )
87 ++m_nlLineNr;
88 m_nlLinePos = 1;
90 else
91 ++m_nlLinePos;
93 return c;
96 // This function implements the scanner described in
98 // http://www.w3.org/pub/WWW/TR/WD-css1.html
99 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
101 // for CSS1. It's a direct implementation of the
102 // described Lex grammar.
104 CSS1Token CSS1Parser::GetNextToken()
106 CSS1Token nRet = CSS1_NULL;
107 m_aToken.clear();
109 do {
110 // remember if white space was read
111 bool bPrevWhiteSpace = m_bWhiteSpace;
112 m_bWhiteSpace = false;
114 bool bNextCh = true;
115 switch( m_cNextCh )
117 case '/': // COMMENT | '/'
119 m_cNextCh = GetNextChar();
120 if( '*' == m_cNextCh )
122 // COMMENT
123 m_cNextCh = GetNextChar();
125 bool bAsterisk = false;
126 while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
128 bAsterisk = ('*'==m_cNextCh);
129 m_cNextCh = GetNextChar();
132 else
134 // '/'
135 bNextCh = false;
136 nRet = CSS1_SLASH;
139 break;
141 case '@': // '@import' | '@XXX'
143 m_cNextCh = GetNextChar();
144 if (rtl::isAsciiAlpha(m_cNextCh))
146 // scan the next identifier
147 OUStringBuffer sTmpBuffer(32);
148 do {
149 sTmpBuffer.append( m_cNextCh );
150 m_cNextCh = GetNextChar();
151 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
152 '-' == m_cNextCh) && !IsEOF() );
154 m_aToken += sTmpBuffer;
156 // check if we know it
157 switch( m_aToken[0] )
159 case 'i':
160 case 'I':
161 if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
162 nRet = CSS1_IMPORT_SYM;
163 break;
164 case 'p':
165 case 'P':
166 if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
167 nRet = CSS1_PAGE_SYM;
168 break;
171 // error handling: ignore '@indent' and the rest until
172 // semicolon at end of the next block
173 if( CSS1_NULL==nRet )
175 m_aToken.clear();
176 int nBlockLvl = 0;
177 sal_Unicode cQuoteCh = 0;
178 bool bDone = false, bEscape = false;
179 while( !bDone && !IsEOF() )
181 bool bOldEscape = bEscape;
182 bEscape = false;
183 switch( m_cNextCh )
185 case '{':
186 if( !cQuoteCh && !bOldEscape )
187 nBlockLvl++;
188 break;
189 case ';':
190 if( !cQuoteCh && !bOldEscape )
191 bDone = nBlockLvl==0;
192 break;
193 case '}':
194 if( !cQuoteCh && !bOldEscape )
195 bDone = --nBlockLvl==0;
196 break;
197 case '\"':
198 case '\'':
199 if( !bOldEscape )
201 if( cQuoteCh )
203 if( cQuoteCh == m_cNextCh )
204 cQuoteCh = 0;
206 else
208 cQuoteCh = m_cNextCh;
211 break;
212 case '\\':
213 if( !bOldEscape )
214 bEscape = true;
215 break;
217 m_cNextCh = GetNextChar();
221 bNextCh = false;
224 break;
226 case '!': // '!' 'legal' | '!' 'important' | syntax error
228 // ignore white space
229 m_cNextCh = GetNextChar();
230 while( ( ' ' == m_cNextCh ||
231 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
233 m_bWhiteSpace = true;
234 m_cNextCh = GetNextChar();
237 if( 'i'==m_cNextCh || 'I'==m_cNextCh)
239 // scan next identifier
240 OUStringBuffer sTmpBuffer(32);
241 do {
242 sTmpBuffer.append( m_cNextCh );
243 m_cNextCh = GetNextChar();
244 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
245 '-' == m_cNextCh) && !IsEOF() );
247 m_aToken += sTmpBuffer;
249 if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
250 m_aToken.equalsIgnoreAsciiCase( "important" ) )
252 // '!' 'important'
253 nRet = CSS1_IMPORTANT_SYM;
255 else
257 // error handling: ignore '!', not IDENT
258 nRet = CSS1_IDENT;
261 m_bWhiteSpace = false;
262 bNextCh = false;
264 else
266 // error handling: ignore '!'
267 bNextCh = false;
270 break;
272 case '\"':
273 case '\'': // STRING
275 // \... isn't possible yet!!!
276 sal_Unicode cQuoteChar = m_cNextCh;
277 m_cNextCh = GetNextChar();
279 OUStringBuffer sTmpBuffer( MAX_LEN );
280 do {
281 sTmpBuffer.append( m_cNextCh );
282 m_cNextCh = GetNextChar();
283 } while( cQuoteChar != m_cNextCh && !IsEOF() );
285 m_aToken += sTmpBuffer;
287 nRet = CSS1_STRING;
289 break;
291 case '0':
292 case '1':
293 case '2':
294 case '3':
295 case '4':
296 case '5':
297 case '6':
298 case '7':
299 case '8':
300 case '9': // NUMBER | PERCENTAGE | LENGTH
302 // save current position
303 std::size_t nInPosSave = m_nInPos;
304 sal_Unicode cNextChSave = m_cNextCh;
305 sal_uInt32 nlLineNrSave = m_nlLineNr;
306 sal_uInt32 nlLinePosSave = m_nlLinePos;
307 bool bEOFSave = m_bEOF;
309 // first try to parse a hex digit
310 OUStringBuffer sTmpBuffer( 16 );
311 do {
312 sTmpBuffer.append( m_cNextCh );
313 m_cNextCh = GetNextChar();
314 } while( sTmpBuffer.getLength() < 7 &&
315 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
316 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
317 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
318 !IsEOF() );
320 if( sTmpBuffer.getLength()==6 )
322 // we found a color in hex
323 m_aToken += sTmpBuffer;
324 nRet = CSS1_HEXCOLOR;
325 bNextCh = false;
327 break;
330 // otherwise we try a number
331 m_nInPos = nInPosSave;
332 m_cNextCh = cNextChSave;
333 m_nlLineNr = nlLineNrSave;
334 m_nlLinePos = nlLinePosSave;
335 m_bEOF = bEOFSave;
337 // first parse the number
338 sTmpBuffer.setLength( 0 );
339 do {
340 sTmpBuffer.append( m_cNextCh );
341 m_cNextCh = GetNextChar();
342 } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
343 !IsEOF() );
345 m_aToken += sTmpBuffer;
346 m_nValue = m_aToken.toDouble();
348 // ignore white space
349 while( ( ' ' == m_cNextCh ||
350 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
352 m_bWhiteSpace = true;
353 m_cNextCh = GetNextChar();
356 // check now, of there is a unit
357 switch( m_cNextCh )
359 case '%': // PERCENTAGE
360 m_bWhiteSpace = false;
361 nRet = CSS1_PERCENTAGE;
362 break;
364 case 'c':
365 case 'C': // LENGTH cm | LENGTH IDENT
366 case 'e':
367 case 'E': // LENGTH (em | ex) | LENGTH IDENT
368 case 'i':
369 case 'I': // LENGTH inch | LENGTH IDENT
370 case 'p':
371 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
372 case 'm':
373 case 'M': // LENGTH mm | LENGTH IDENT
375 // save current position
376 sal_Int32 nInPosOld = m_nInPos;
377 sal_Unicode cNextChOld = m_cNextCh;
378 sal_uInt32 nlLineNrOld = m_nlLineNr;
379 sal_uInt32 nlLinePosOld = m_nlLinePos;
380 bool bEOFOld = m_bEOF;
382 // parse the next identifier
383 OUStringBuffer sTmpBuffer2(64);
384 do {
385 sTmpBuffer2.append(static_cast<sal_Unicode>(rtl::toAsciiLowerCase(m_cNextCh)));
386 m_cNextCh = GetNextChar();
387 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
388 '-' == m_cNextCh) && !IsEOF() );
390 OUString aIdent = sTmpBuffer2.makeStringAndClear();
391 nRet = CSS1_NUMBER;
393 // Is it a unit?
394 switch( aIdent[0] )
396 case 'c':
397 if (aIdent == "cm")
399 m_nValue = o3tl::convert(m_nValue, o3tl::Length::cm, o3tl::Length::twip);
400 nRet = CSS1_LENGTH;
402 break;
403 case 'e':
404 if (aIdent == "em")
405 nRet = CSS1_EMS;
406 else if (aIdent == "ex")
407 nRet = CSS1_EMX;
408 break;
409 case 'i':
410 if (aIdent == "in")
412 nRet = CSS1_LENGTH;
413 m_nValue = o3tl::convert(m_nValue, o3tl::Length::in, o3tl::Length::twip);
415 break;
416 case 'm':
417 if (aIdent == "mm")
419 nRet = CSS1_LENGTH;
420 m_nValue = o3tl::convert(m_nValue, o3tl::Length::mm, o3tl::Length::twip);
422 break;
423 case 'p':
424 if (aIdent == "pt")
426 nRet = CSS1_LENGTH;
427 m_nValue = o3tl::convert(m_nValue, o3tl::Length::pt, o3tl::Length::twip);
429 else if (aIdent == "pc")
431 nRet = CSS1_LENGTH;
432 m_nValue = o3tl::convert(m_nValue, o3tl::Length::pc, o3tl::Length::twip);
434 else if (aIdent == "px")
435 nRet = CSS1_PIXLENGTH;
436 break;
439 if( nRet == CSS1_NUMBER )
441 m_nInPos = nInPosOld;
442 m_cNextCh = cNextChOld;
443 m_nlLineNr = nlLineNrOld;
444 m_nlLinePos = nlLinePosOld;
445 m_bEOF = bEOFOld;
447 else
449 m_bWhiteSpace = false;
451 bNextCh = false;
453 break;
454 default: // NUMBER IDENT
455 bNextCh = false;
456 nRet = CSS1_NUMBER;
457 break;
460 break;
462 case ':': // ':'
463 // catch link/visited/active !!!
464 nRet = CSS1_COLON;
465 break;
467 case '.': // DOT_W_WS | DOT_WO_WS
468 nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
469 break;
471 case '+': // '+'
472 nRet = CSS1_PLUS;
473 break;
475 case '-': // '-'
476 nRet = CSS1_MINUS;
477 break;
479 case '{': // '{'
480 nRet = CSS1_OBRACE;
481 break;
483 case '}': // '}'
484 nRet = CSS1_CBRACE;
485 break;
487 case ';': // ';'
488 nRet = CSS1_SEMICOLON;
489 break;
491 case ',': // ','
492 nRet = CSS1_COMMA;
493 break;
495 case '#': // '#'
496 m_cNextCh = GetNextChar();
497 if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
498 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
499 ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
501 // save current position
502 sal_Int32 nInPosSave = m_nInPos;
503 sal_Unicode cNextChSave = m_cNextCh;
504 sal_uInt32 nlLineNrSave = m_nlLineNr;
505 sal_uInt32 nlLinePosSave = m_nlLinePos;
506 bool bEOFSave = m_bEOF;
508 // first try to parse a hex digit
509 OUStringBuffer sTmpBuffer(8);
510 do {
511 sTmpBuffer.append( m_cNextCh );
512 m_cNextCh = GetNextChar();
513 } while( sTmpBuffer.getLength() < 9 &&
514 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
515 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
516 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
517 !IsEOF() );
519 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
521 // we found a color in hex (RGB)
522 m_aToken += sTmpBuffer;
523 nRet = CSS1_HEXCOLOR;
524 bNextCh = false;
526 break;
529 if( sTmpBuffer.getLength()==8 )
531 // we found a color in hex (RGBA)
532 // we convert it to RGB assuming white background
533 sal_uInt32 nColor = sTmpBuffer.makeStringAndClear().toUInt32(16);
534 sal_uInt32 nRed = (nColor & 0xff000000) >> 24;
535 sal_uInt32 nGreen = (nColor & 0xff0000) >> 16;
536 sal_uInt32 nBlue = (nColor & 0xff00) >> 8;
537 double nAlpha = (nColor & 0xff) / 255.0;
538 nRed = (1 - nAlpha) * 255 + nAlpha * nRed;
539 nGreen = (1 - nAlpha) * 255 + nAlpha * nGreen;
540 nBlue = (1 - nAlpha) * 255 + nAlpha * nBlue;
541 nColor = (nRed << 16) + (nGreen << 8) + nBlue;
542 m_aToken += OUString::number(nColor, 16);
543 nRet = CSS1_HEXCOLOR;
544 bNextCh = false;
546 break;
549 // otherwise we try a number
550 m_nInPos = nInPosSave;
551 m_cNextCh = cNextChSave;
552 m_nlLineNr = nlLineNrSave;
553 m_nlLinePos = nlLinePosSave;
554 m_bEOF = bEOFSave;
557 nRet = CSS1_HASH;
558 bNextCh = false;
559 break;
561 case ' ':
562 case '\t':
563 case '\r':
564 case '\n': // White-Space
565 m_bWhiteSpace = true;
566 break;
568 case sal_Unicode(EOF):
569 if( IsEOF() )
571 m_eState = CSS1_PAR_ACCEPTED;
572 bNextCh = false;
573 break;
575 [[fallthrough]];
577 default: // IDENT | syntax error
578 if (rtl::isAsciiAlpha(m_cNextCh))
580 // IDENT
582 bool bHexColor = true;
584 // parse the next identifier
585 OUStringBuffer sTmpBuffer(64);
586 do {
587 sTmpBuffer.append( m_cNextCh );
588 if( bHexColor )
590 bHexColor = sTmpBuffer.getLength()<7 &&
591 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
592 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
593 ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
595 m_cNextCh = GetNextChar();
596 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
597 '-' == m_cNextCh) && !IsEOF() );
599 m_aToken += sTmpBuffer;
601 if( bHexColor && sTmpBuffer.getLength()==6 )
603 bNextCh = false;
604 nRet = CSS1_HEXCOLOR;
606 break;
608 if( '('==m_cNextCh &&
609 ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
610 m_aToken.equalsIgnoreAsciiCase( "url" )) ||
611 (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
612 (m_aToken.equalsIgnoreAsciiCase( "rgb" ) || m_aToken.equalsIgnoreAsciiCase( "rgba" ) )
613 ) ) )
615 int nNestCnt = 0;
616 OUStringBuffer sTmpBuffer2(64);
617 do {
618 sTmpBuffer2.append( m_cNextCh );
619 switch( m_cNextCh )
621 case '(': nNestCnt++; break;
622 case ')': nNestCnt--; break;
624 m_cNextCh = GetNextChar();
625 } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
626 sTmpBuffer2.append( m_cNextCh );
627 m_aToken += sTmpBuffer2;
628 bNextCh = true;
629 nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
630 ? CSS1_URL
631 : CSS1_RGB;
633 else
635 bNextCh = false;
636 nRet = CSS1_IDENT;
639 // error handling: ignore digit
640 break;
642 if( bNextCh )
643 m_cNextCh = GetNextChar();
645 } while( CSS1_NULL==nRet && IsParserWorking() );
647 return nRet;
650 // These functions implement the parser described in
652 // http://www.w3.org/pub/WWW/TR/WD-css1.html
653 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
655 // for CSS1. It's a direct implementation of the
656 // described Lex grammar.
658 // stylesheet
659 // : import* rule*
661 // import
662 // : IMPORT_SYM url
664 // url
665 // : STRING
667 void CSS1Parser::ParseStyleSheet()
669 LOOP_CHECK_DECL
671 // import*
672 bool bDone = false;
673 while( !bDone && IsParserWorking() )
675 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
677 switch( m_nToken )
679 case CSS1_IMPORT_SYM:
680 // IMPORT_SYM url
681 // URL are skipped without checks
682 m_nToken = GetNextToken();
683 break;
684 case CSS1_IDENT: // Look-Aheads
685 case CSS1_DOT_W_WS:
686 case CSS1_HASH:
687 case CSS1_PAGE_SYM:
688 // rule
689 bDone = true;
690 break;
691 default:
692 // error handling: ignore
693 break;
696 if( !bDone )
697 m_nToken = GetNextToken();
700 LOOP_CHECK_RESTART
702 // rule *
703 while( IsParserWorking() )
705 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
707 switch( m_nToken )
709 case CSS1_IDENT: // Look-Aheads
710 case CSS1_DOT_W_WS:
711 case CSS1_HASH:
712 case CSS1_PAGE_SYM:
713 // rule
714 ParseRule();
715 break;
716 default:
717 // error handling: ignore
718 m_nToken = GetNextToken();
719 break;
724 // rule
725 // : selector [ ',' selector ]*
726 // '{' declaration [ ';' declaration ]* '}'
728 void CSS1Parser::ParseRule()
730 // selector
731 std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
732 if( !pSelector )
733 return;
735 // process selector
736 SelectorParsed( std::move(pSelector), true );
738 LOOP_CHECK_DECL
740 // [ ',' selector ]*
741 while( CSS1_COMMA==m_nToken && IsParserWorking() )
743 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
745 // ignore ','
746 m_nToken = GetNextToken();
748 // selector
749 pSelector = ParseSelector();
750 if( !pSelector )
751 return;
753 // process selector
754 SelectorParsed( std::move(pSelector), false );
757 // '{'
758 if( CSS1_OBRACE != m_nToken )
759 return;
760 m_nToken = GetNextToken();
762 // declaration
763 OUString aProperty;
764 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
765 if( !pExpr )
766 return;
768 // process expression
769 DeclarationParsed( aProperty, std::move(pExpr) );
771 LOOP_CHECK_RESTART
773 // [ ';' declaration ]*
774 while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
776 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
778 // ';'
779 m_nToken = GetNextToken();
781 // declaration
782 if( CSS1_IDENT == m_nToken )
784 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
785 if( pExp )
787 // process expression
788 DeclarationParsed( aProperty, std::move(pExp));
793 // '}'
794 if( CSS1_CBRACE == m_nToken )
795 m_nToken = GetNextToken();
798 // selector
799 // : simple_selector+ [ ':' pseudo_element ]?
801 // simple_selector
802 // : element_name [ DOT_WO_WS class ]?
803 // | DOT_W_WS class
804 // | id_selector
806 // element_name
807 // : IDENT
809 // class
810 // : IDENT
812 // id_selector
813 // : '#' IDENT
815 // pseudo_element
816 // : IDENT
818 std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
820 std::unique_ptr<CSS1Selector> pRoot;
821 CSS1Selector *pLast = nullptr;
823 bool bDone = false;
824 CSS1Selector *pNew = nullptr;
826 LOOP_CHECK_DECL
828 // simple_selector+
829 while( !bDone && IsParserWorking() )
831 LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
833 bool bNextToken = true;
835 switch( m_nToken )
837 case CSS1_IDENT:
839 // element_name [ DOT_WO_WS class ]?
841 // element_name
842 OUString aElement = m_aToken;
843 CSS1SelectorType eType = CSS1_SELTYPE_ELEMENT;
844 m_nToken = GetNextToken();
846 if( CSS1_DOT_WO_WS == m_nToken )
848 // DOT_WO_WS
849 m_nToken = GetNextToken();
851 // class
852 if( CSS1_IDENT == m_nToken )
854 aElement += "." + m_aToken;
855 eType = CSS1_SELTYPE_ELEM_CLASS;
857 else
859 // missing class
860 return pRoot;
863 else
865 // that was a look-ahead
866 bNextToken = false;
868 pNew = new CSS1Selector( eType, aElement );
870 break;
871 case CSS1_DOT_W_WS:
872 // DOT_W_WS class
874 // DOT_W_WS
875 m_nToken = GetNextToken();
877 if( CSS1_IDENT==m_nToken )
879 // class
880 pNew = new CSS1Selector( CSS1_SELTYPE_CLASS, m_aToken );
882 else
884 // missing class
885 return pRoot;
887 break;
888 case CSS1_HASH:
889 // '#' id_selector
891 // '#'
892 m_nToken = GetNextToken();
894 if( CSS1_IDENT==m_nToken )
896 // id_selector
897 pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken );
899 else
901 // missing id_selector
902 return pRoot;
904 break;
906 case CSS1_PAGE_SYM:
908 // @page
909 pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken );
911 break;
913 default:
914 // stop because we don't know what's next
915 bDone = true;
916 break;
919 // if created a new selector then save it
920 if( pNew )
922 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
923 "Root-Selector, but no Last" );
924 if( pLast )
925 pLast->SetNext( pNew );
926 else
927 pRoot.reset(pNew);
929 pLast = pNew;
930 pNew = nullptr;
933 if( bNextToken && !bDone )
934 m_nToken = GetNextToken();
937 if( !pRoot )
939 // missing simple_selector
940 return pRoot;
943 // [ ':' pseudo_element ]?
944 if( CSS1_COLON==m_nToken && IsParserWorking() )
946 // ':' pseudo element
947 m_nToken = GetNextToken();
948 if( CSS1_IDENT==m_nToken )
950 if (pLast)
951 pLast->SetNext( new CSS1Selector(CSS1_SELTYPE_PSEUDO,m_aToken) );
952 m_nToken = GetNextToken();
954 else
956 // missing pseudo_element
957 return pRoot;
961 return pRoot;
964 // declaration
965 // : property ':' expr prio?
966 // | /* empty */
968 // expression
969 // : term [ operator term ]*
971 // term
972 // : unary_operator?
973 // [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
974 // HEXCOLOR | URL | RGB ]
976 // operator
977 // : '/' | ',' | /* empty */
979 // unary_operator
980 // : '-' | '+'
982 // property
983 // : ident
985 // the sign is only used for numeric values (except PERCENTAGE)
986 // and it's applied on nValue!
987 std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
989 std::unique_ptr<CSS1Expression> pRoot;
990 CSS1Expression *pLast = nullptr;
992 // property
993 if( CSS1_IDENT != m_nToken )
995 // missing property
996 return pRoot;
998 rProperty = m_aToken;
1000 m_nToken = GetNextToken();
1002 // ':'
1003 if( CSS1_COLON != m_nToken )
1005 // missing ':'
1006 return pRoot;
1008 m_nToken = GetNextToken();
1010 // term [operator term]*
1011 // here we're pretty lax regarding the syntax, but this shouldn't
1012 // be a problem
1013 bool bDone = false;
1014 sal_Unicode cSign = 0, cOp = 0;
1015 CSS1Expression *pNew = nullptr;
1017 LOOP_CHECK_DECL
1019 while( !bDone && IsParserWorking() )
1021 LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1023 switch( m_nToken )
1025 case CSS1_MINUS:
1026 cSign = '-';
1027 break;
1029 case CSS1_PLUS:
1030 cSign = '+';
1031 break;
1033 case CSS1_NUMBER:
1034 case CSS1_LENGTH:
1035 case CSS1_PIXLENGTH:
1036 case CSS1_EMS:
1037 case CSS1_EMX:
1038 if( '-'==cSign )
1039 m_nValue = -m_nValue;
1040 [[fallthrough]];
1041 case CSS1_STRING:
1042 case CSS1_PERCENTAGE:
1043 case CSS1_IDENT:
1044 case CSS1_URL:
1045 case CSS1_RGB:
1046 case CSS1_HEXCOLOR:
1047 pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1048 m_nValue = 0; // otherwise this also is applied to next ident
1049 cSign = 0;
1050 cOp = 0;
1051 break;
1053 case CSS1_SLASH:
1054 cOp = '/';
1055 cSign = 0;
1056 break;
1058 case CSS1_COMMA:
1059 cOp = ',';
1060 cSign = 0;
1061 break;
1063 default:
1064 bDone = true;
1065 break;
1068 // if created a new expression save it
1069 if( pNew )
1071 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1072 "Root-Selector, but no Last" );
1073 if( pLast )
1074 pLast->SetNext( pNew );
1075 else
1076 pRoot.reset(pNew);
1078 pLast = pNew;
1079 pNew = nullptr;
1082 if( !bDone )
1083 m_nToken = GetNextToken();
1086 if( !pRoot )
1088 // missing term
1089 return pRoot;
1092 // prio?
1093 if( CSS1_IMPORTANT_SYM==m_nToken )
1095 // IMPORTANT_SYM
1096 m_nToken = GetNextToken();
1099 return pRoot;
1102 CSS1Parser::CSS1Parser()
1103 : m_bWhiteSpace(false)
1104 , m_bEOF(false)
1105 , m_cNextCh(0)
1106 , m_nInPos(0)
1107 , m_nlLineNr(0)
1108 , m_nlLinePos(0)
1109 , m_nValue(0)
1110 , m_eState(CSS1_PAR_ACCEPTED)
1111 , m_nToken(CSS1_NULL)
1115 CSS1Parser::~CSS1Parser()
1119 void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1121 OUString aTmp( rIn );
1123 sal_Unicode c;
1124 while( !aTmp.isEmpty() &&
1125 ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1126 aTmp = aTmp.copy( 1 );
1128 while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1129 || '\t'==c || '\r'==c || '\n'==c ) )
1130 aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1132 // remove SGML comments
1133 if( aTmp.getLength() >= 4 &&
1134 aTmp.startsWith( "<!--" ) )
1135 aTmp = aTmp.copy( 4 );
1137 if( aTmp.getLength() >=3 &&
1138 aTmp.endsWith("-->") )
1139 aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1141 if( aTmp.isEmpty() )
1142 return;
1144 InitRead( aTmp );
1146 ParseStyleSheet();
1149 void CSS1Parser::ParseStyleOption( const OUString& rIn )
1151 if( rIn.isEmpty() )
1152 return;
1154 InitRead( rIn );
1156 // fdo#41796: skip over spurious semicolons
1157 while (CSS1_SEMICOLON == m_nToken)
1159 m_nToken = GetNextToken();
1162 OUString aProperty;
1163 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1164 if( !pExpr )
1165 return;
1167 // process expression
1168 DeclarationParsed( aProperty, std::move(pExpr) );
1170 LOOP_CHECK_DECL
1172 // [ ';' declaration ]*
1173 while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
1175 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1177 m_nToken = GetNextToken();
1178 if( CSS1_IDENT==m_nToken )
1180 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1181 if( pExp )
1183 // process expression
1184 DeclarationParsed( aProperty, std::move(pExp) );
1190 void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1194 void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1195 std::unique_ptr<CSS1Expression> /* pExpr */ )
1199 CSS1Selector::~CSS1Selector()
1201 delete m_pNext;
1204 CSS1Expression::~CSS1Expression()
1206 delete pNext;
1209 void CSS1Expression::GetURL( OUString& rURL ) const
1211 OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1213 OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1214 aValue.getLength() > 5 &&
1215 '(' == aValue[3] &&
1216 ')' == aValue[aValue.getLength()-1],
1217 "no valid URL(...)" );
1219 if( aValue.getLength() <= 5 )
1220 return;
1222 rURL = aValue.copy( 4, aValue.getLength() - 5 );
1224 // tdf#94088 original stripped only spaces, but there may also be
1225 // double quotes in CSS style URLs, so be prepared to spaces followed
1226 // by a single quote followed by spaces
1227 const sal_Unicode aSpace(' ');
1228 const sal_Unicode aSingleQuote('\'');
1230 rURL = comphelper::string::strip(rURL, aSpace);
1231 rURL = comphelper::string::strip(rURL, aSingleQuote);
1232 rURL = comphelper::string::strip(rURL, aSpace);
1235 bool CSS1Expression::GetColor( Color &rColor ) const
1237 OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1238 CSS1_HEXCOLOR==eType || CSS1_STRING==eType,
1239 "CSS1-Expression cannot be colour" );
1241 bool bRet = false;
1242 sal_uInt32 nColor = SAL_MAX_UINT32;
1244 switch( eType )
1246 case CSS1_RGB:
1248 // fourth value to 255 means no alpha transparency
1249 // so the right by default value
1250 sal_uInt8 aColors[4] = { 0, 0, 0, 255 };
1252 // it can be "rgb" or "rgba"
1253 if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1254 (aValue[3] != '(' && aValue[4] != '(' ) || aValue[aValue.getLength()-1] != ')')
1256 break;
1259 sal_Int32 nPos = aValue.startsWithIgnoreAsciiCase( "rgba" )?5:4; // start after "rgba(" or "rgb("
1260 char cSep = (aValue.indexOf(',') != -1)?',':' ';
1261 // alpha value can be after a "/" or ","
1262 bool bIsSepAlphaDiv = (aValue.indexOf('/') != -1)?true:false;
1263 for ( int nCol = 0; nCol < 4 && nPos > 0; ++nCol )
1265 const std::u16string_view aNumber = o3tl::getToken(aValue, 0, cSep, nPos);
1267 sal_Int32 nNumber = o3tl::toInt32(aNumber);
1268 if( nNumber<0 )
1270 nNumber = 0;
1272 else if( aNumber.find('%') != std::u16string_view::npos )
1274 if( nNumber > 100 )
1275 nNumber = 100;
1276 nNumber *= 255;
1277 nNumber /= 100;
1279 else if( nNumber > 255 )
1280 nNumber = 255;
1281 else if( aNumber.find('.') != std::u16string_view::npos )
1283 // in this case aNumber contains something like "0.3" so not an sal_Int32
1284 nNumber = static_cast<sal_Int32>(255.0*o3tl::toDouble(aNumber));
1286 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1287 // rgb with alpha and '/' has this form: rgb(255 0 0 / 50%)
1288 if (bIsSepAlphaDiv && nCol == 2)
1290 // but there can be some spaces or not before and after the "/", so skip them
1291 while (aValue[nPos] == '/' || aValue[nPos] == ' ')
1292 ++nPos;
1296 rColor.SetRed( aColors[0] );
1297 rColor.SetGreen( aColors[1] );
1298 rColor.SetBlue( aColors[2] );
1299 rColor.SetAlpha( aColors[3] );
1301 bRet = true; // something different than a colour isn't possible
1303 break;
1305 case CSS1_IDENT:
1306 case CSS1_STRING:
1308 OUString aTmp( aValue.toAsciiUpperCase() );
1309 nColor = GetHTMLColor( aTmp );
1310 bRet = nColor != SAL_MAX_UINT32;
1312 if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1313 aValue[0] != '#' )
1314 break;
1315 [[fallthrough]];
1316 case CSS1_HEXCOLOR:
1318 // MS-IE hack: colour can also be a string
1319 sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1320 bool bDouble = aValue.getLength()-nOffset == 3;
1321 sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1323 nColor = 0;
1324 for( ; i<nEnd; i++ )
1326 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1327 : '0' );
1328 if( c >= '0' && c <= '9' )
1329 c -= 48;
1330 else if( c >= 'A' && c <= 'F' )
1331 c -= 55;
1332 else if( c >= 'a' && c <= 'f' )
1333 c -= 87;
1334 else
1335 c = 16;
1337 nColor *= 16;
1338 if( c<16 )
1339 nColor += c;
1340 if( bDouble )
1342 nColor *= 16;
1343 if( c<16 )
1344 nColor += c;
1347 bRet = true;
1349 break;
1350 default:
1354 if( bRet && nColor!=SAL_MAX_UINT32 )
1356 rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1357 rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1358 rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1361 return bRet;
1364 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */