Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / sw / source / filter / html / parcss1.cxx
blobf3145f1fa5bdb5612e12309b6e66f68b4005cf6a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <o3tl/string_view.hxx>
21 #include <osl/diagnose.h>
22 #include <rtl/character.hxx>
23 #include <rtl/ustrbuf.hxx>
24 #include <tools/color.hxx>
25 #include <tools/solar.h>
26 #include <svtools/htmltokn.h>
27 #include <comphelper/string.hxx>
28 #include "parcss1.hxx"
30 // Loop-Check: Used to avoid infinite loops, is checked after every
31 // loop, if there is progress of the input position
32 #define LOOP_CHECK
34 #ifdef LOOP_CHECK
36 #define LOOP_CHECK_DECL \
37 sal_Int32 nOldInPos = SAL_MAX_INT32;
38 #define LOOP_CHECK_RESTART \
39 nOldInPos = SAL_MAX_INT32;
40 #define LOOP_CHECK_CHECK( where ) \
41 OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \
42 if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \
43 break; \
44 else \
45 nOldInPos = m_nInPos;
47 #else
49 #define LOOP_CHECK_DECL
50 #define LOOP_CHECK_RESTART
51 #define LOOP_CHECK_CHECK( where )
53 #endif
55 const sal_Int32 MAX_LEN = 1024;
57 void CSS1Parser::InitRead( const OUString& rIn )
59 m_nlLineNr = 0;
60 m_nlLinePos = 0;
62 m_bWhiteSpace = true; // if nothing was read it's like there was WS
63 m_bEOF = false;
64 m_eState = CSS1_PAR_WORKING;
65 m_nValue = 0.;
67 m_aIn = rIn;
68 m_nInPos = 0;
69 m_cNextCh = GetNextChar();
70 m_nToken = GetNextToken();
73 sal_Unicode CSS1Parser::GetNextChar()
75 if( m_nInPos >= m_aIn.getLength() )
77 m_bEOF = true;
78 return sal_Unicode(EOF);
81 sal_Unicode c = m_aIn[m_nInPos];
82 m_nInPos++;
84 if( c == '\n' )
86 ++m_nlLineNr;
87 m_nlLinePos = 1;
89 else
90 ++m_nlLinePos;
92 return c;
95 // This function implements the scanner described in
97 // http://www.w3.org/pub/WWW/TR/WD-css1.html
98 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
100 // for CSS1. It's a direct implementation of the
101 // described Lex grammar.
103 CSS1Token CSS1Parser::GetNextToken()
105 CSS1Token nRet = CSS1_NULL;
106 m_aToken.clear();
108 do {
109 // remember if white space was read
110 bool bPrevWhiteSpace = m_bWhiteSpace;
111 m_bWhiteSpace = false;
113 bool bNextCh = true;
114 switch( m_cNextCh )
116 case '/': // COMMENT | '/'
118 m_cNextCh = GetNextChar();
119 if( '*' == m_cNextCh )
121 // COMMENT
122 m_cNextCh = GetNextChar();
124 bool bAsterisk = false;
125 while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
127 bAsterisk = ('*'==m_cNextCh);
128 m_cNextCh = GetNextChar();
131 else
133 // '/'
134 bNextCh = false;
135 nRet = CSS1_SLASH;
138 break;
140 case '@': // '@import' | '@XXX'
142 m_cNextCh = GetNextChar();
143 if (rtl::isAsciiAlpha(m_cNextCh))
145 // scan the next identifier
146 OUStringBuffer sTmpBuffer(32);
147 do {
148 sTmpBuffer.append( m_cNextCh );
149 m_cNextCh = GetNextChar();
150 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
151 '-' == m_cNextCh) && !IsEOF() );
153 m_aToken += sTmpBuffer;
155 // check if we know it
156 switch( m_aToken[0] )
158 case 'i':
159 case 'I':
160 if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
161 nRet = CSS1_IMPORT_SYM;
162 break;
163 case 'p':
164 case 'P':
165 if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
166 nRet = CSS1_PAGE_SYM;
167 break;
170 // error handling: ignore '@indent' and the rest until
171 // semicolon at end of the next block
172 if( CSS1_NULL==nRet )
174 m_aToken.clear();
175 int nBlockLvl = 0;
176 sal_Unicode cQuoteCh = 0;
177 bool bDone = false, bEscape = false;
178 while( !bDone && !IsEOF() )
180 bool bOldEscape = bEscape;
181 bEscape = false;
182 switch( m_cNextCh )
184 case '{':
185 if( !cQuoteCh && !bOldEscape )
186 nBlockLvl++;
187 break;
188 case ';':
189 if( !cQuoteCh && !bOldEscape )
190 bDone = nBlockLvl==0;
191 break;
192 case '}':
193 if( !cQuoteCh && !bOldEscape )
194 bDone = --nBlockLvl==0;
195 break;
196 case '\"':
197 case '\'':
198 if( !bOldEscape )
200 if( cQuoteCh )
202 if( cQuoteCh == m_cNextCh )
203 cQuoteCh = 0;
205 else
207 cQuoteCh = m_cNextCh;
210 break;
211 case '\\':
212 if( !bOldEscape )
213 bEscape = true;
214 break;
216 m_cNextCh = GetNextChar();
220 bNextCh = false;
223 break;
225 case '!': // '!' 'legal' | '!' 'important' | syntax error
227 // ignore white space
228 m_cNextCh = GetNextChar();
229 while( ( ' ' == m_cNextCh ||
230 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
232 m_bWhiteSpace = true;
233 m_cNextCh = GetNextChar();
236 if( 'i'==m_cNextCh || 'I'==m_cNextCh)
238 // scan next identifier
239 OUStringBuffer sTmpBuffer(32);
240 do {
241 sTmpBuffer.append( m_cNextCh );
242 m_cNextCh = GetNextChar();
243 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
244 '-' == m_cNextCh) && !IsEOF() );
246 m_aToken += sTmpBuffer;
248 if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
249 m_aToken.equalsIgnoreAsciiCase( "important" ) )
251 // '!' 'important'
252 nRet = CSS1_IMPORTANT_SYM;
254 else
256 // error handling: ignore '!', not IDENT
257 nRet = CSS1_IDENT;
260 m_bWhiteSpace = false;
261 bNextCh = false;
263 else
265 // error handling: ignore '!'
266 bNextCh = false;
269 break;
271 case '\"':
272 case '\'': // STRING
274 // \... isn't possible yet!!!
275 sal_Unicode cQuoteChar = m_cNextCh;
276 m_cNextCh = GetNextChar();
278 OUStringBuffer sTmpBuffer( MAX_LEN );
279 do {
280 sTmpBuffer.append( m_cNextCh );
281 m_cNextCh = GetNextChar();
282 } while( cQuoteChar != m_cNextCh && !IsEOF() );
284 m_aToken += sTmpBuffer;
286 nRet = CSS1_STRING;
288 break;
290 case '0':
291 case '1':
292 case '2':
293 case '3':
294 case '4':
295 case '5':
296 case '6':
297 case '7':
298 case '8':
299 case '9': // NUMBER | PERCENTAGE | LENGTH
301 // save current position
302 std::size_t nInPosSave = m_nInPos;
303 sal_Unicode cNextChSave = m_cNextCh;
304 sal_uInt32 nlLineNrSave = m_nlLineNr;
305 sal_uInt32 nlLinePosSave = m_nlLinePos;
306 bool bEOFSave = m_bEOF;
308 // first try to parse a hex digit
309 OUStringBuffer sTmpBuffer( 16 );
310 do {
311 sTmpBuffer.append( m_cNextCh );
312 m_cNextCh = GetNextChar();
313 } while( sTmpBuffer.getLength() < 7 &&
314 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
315 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
316 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
317 !IsEOF() );
319 if( sTmpBuffer.getLength()==6 )
321 // we found a color in hex
322 m_aToken += sTmpBuffer;
323 nRet = CSS1_HEXCOLOR;
324 bNextCh = false;
326 break;
329 // otherwise we try a number
330 m_nInPos = nInPosSave;
331 m_cNextCh = cNextChSave;
332 m_nlLineNr = nlLineNrSave;
333 m_nlLinePos = nlLinePosSave;
334 m_bEOF = bEOFSave;
336 // first parse the number
337 sTmpBuffer.setLength( 0 );
338 do {
339 sTmpBuffer.append( m_cNextCh );
340 m_cNextCh = GetNextChar();
341 } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
342 !IsEOF() );
344 m_aToken += sTmpBuffer;
345 m_nValue = m_aToken.toDouble();
347 // ignore white space
348 while( ( ' ' == m_cNextCh ||
349 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
351 m_bWhiteSpace = true;
352 m_cNextCh = GetNextChar();
355 // check now, of there is a unit
356 switch( m_cNextCh )
358 case '%': // PERCENTAGE
359 m_bWhiteSpace = false;
360 nRet = CSS1_PERCENTAGE;
361 break;
363 case 'c':
364 case 'C': // LENGTH cm | LENGTH IDENT
365 case 'e':
366 case 'E': // LENGTH (em | ex) | LENGTH IDENT
367 case 'i':
368 case 'I': // LENGTH inch | LENGTH IDENT
369 case 'p':
370 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
371 case 'm':
372 case 'M': // LENGTH mm | LENGTH IDENT
374 // save current position
375 sal_Int32 nInPosOld = m_nInPos;
376 sal_Unicode cNextChOld = m_cNextCh;
377 sal_uInt32 nlLineNrOld = m_nlLineNr;
378 sal_uInt32 nlLinePosOld = m_nlLinePos;
379 bool bEOFOld = m_bEOF;
381 // parse the next identifier
382 OUString aIdent;
383 OUStringBuffer sTmpBuffer2(64);
384 do {
385 sTmpBuffer2.append( m_cNextCh );
386 m_cNextCh = GetNextChar();
387 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
388 '-' == m_cNextCh) && !IsEOF() );
390 aIdent += sTmpBuffer2;
392 // Is it a unit?
393 const char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
394 double nScale1 = 1., nScale2 = 1.;
395 CSS1Token nToken1 = CSS1_LENGTH,
396 nToken2 = CSS1_LENGTH,
397 nToken3 = CSS1_LENGTH;
398 switch( aIdent[0] )
400 case 'c':
401 case 'C':
402 pCmp1 = "cm";
403 nScale1 = (72.*20.)/2.54; // twip
404 break;
405 case 'e':
406 case 'E':
407 pCmp1 = "em";
408 nToken1 = CSS1_EMS;
410 pCmp2 = "ex";
411 nToken2 = CSS1_EMX;
412 break;
413 case 'i':
414 case 'I':
415 pCmp1 = "in";
416 nScale1 = 72.*20.; // twip
417 break;
418 case 'm':
419 case 'M':
420 pCmp1 = "mm";
421 nScale1 = (72.*20.)/25.4; // twip
422 break;
423 case 'p':
424 case 'P':
425 pCmp1 = "pt";
426 nScale1 = 20.; // twip
428 pCmp2 = "pc";
429 nScale2 = 12.*20.; // twip
431 pCmp3 = "px";
432 nToken3 = CSS1_PIXLENGTH;
433 break;
436 double nScale = 0.0;
437 OSL_ENSURE( pCmp1, "Where does the first digit come from?" );
438 if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
440 nScale = nScale1;
441 nRet = nToken1;
443 else if( pCmp2 &&
444 aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
446 nScale = nScale2;
447 nRet = nToken2;
449 else if( pCmp3 &&
450 aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
452 nScale = 1.; // nScale3
453 nRet = nToken3;
455 else
457 nRet = CSS1_NUMBER;
460 if( CSS1_LENGTH==nRet && nScale!=1.0 )
461 m_nValue *= nScale;
463 if( nRet == CSS1_NUMBER )
465 m_nInPos = nInPosOld;
466 m_cNextCh = cNextChOld;
467 m_nlLineNr = nlLineNrOld;
468 m_nlLinePos = nlLinePosOld;
469 m_bEOF = bEOFOld;
471 else
473 m_bWhiteSpace = false;
475 bNextCh = false;
477 break;
478 default: // NUMBER IDENT
479 bNextCh = false;
480 nRet = CSS1_NUMBER;
481 break;
484 break;
486 case ':': // ':'
487 // catch link/visited/active !!!
488 nRet = CSS1_COLON;
489 break;
491 case '.': // DOT_W_WS | DOT_WO_WS
492 nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
493 break;
495 case '+': // '+'
496 nRet = CSS1_PLUS;
497 break;
499 case '-': // '-'
500 nRet = CSS1_MINUS;
501 break;
503 case '{': // '{'
504 nRet = CSS1_OBRACE;
505 break;
507 case '}': // '}'
508 nRet = CSS1_CBRACE;
509 break;
511 case ';': // ';'
512 nRet = CSS1_SEMICOLON;
513 break;
515 case ',': // ','
516 nRet = CSS1_COMMA;
517 break;
519 case '#': // '#'
520 m_cNextCh = GetNextChar();
521 if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
522 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
523 ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
525 // save current position
526 sal_Int32 nInPosSave = m_nInPos;
527 sal_Unicode cNextChSave = m_cNextCh;
528 sal_uInt32 nlLineNrSave = m_nlLineNr;
529 sal_uInt32 nlLinePosSave = m_nlLinePos;
530 bool bEOFSave = m_bEOF;
532 // first try to parse a hex digit
533 OUStringBuffer sTmpBuffer(8);
534 do {
535 sTmpBuffer.append( m_cNextCh );
536 m_cNextCh = GetNextChar();
537 } while( sTmpBuffer.getLength() < 9 &&
538 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
539 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
540 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
541 !IsEOF() );
543 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
545 // we found a color in hex (RGB)
546 m_aToken += sTmpBuffer;
547 nRet = CSS1_HEXCOLOR;
548 bNextCh = false;
550 break;
553 if( sTmpBuffer.getLength()==8 )
555 // we found a color in hex (RGBA)
556 // we convert it to RGB assuming white background
557 sal_uInt32 nColor = sTmpBuffer.makeStringAndClear().toUInt32(16);
558 sal_uInt32 nRed = (nColor & 0xff000000) >> 24;
559 sal_uInt32 nGreen = (nColor & 0xff0000) >> 16;
560 sal_uInt32 nBlue = (nColor & 0xff00) >> 8;
561 double nAlpha = (nColor & 0xff) / 255.0;
562 nRed = (1 - nAlpha) * 255 + nAlpha * nRed;
563 nGreen = (1 - nAlpha) * 255 + nAlpha * nGreen;
564 nBlue = (1 - nAlpha) * 255 + nAlpha * nBlue;
565 nColor = (nRed << 16) + (nGreen << 8) + nBlue;
566 m_aToken += OUString::number(nColor, 16);
567 nRet = CSS1_HEXCOLOR;
568 bNextCh = false;
570 break;
573 // otherwise we try a number
574 m_nInPos = nInPosSave;
575 m_cNextCh = cNextChSave;
576 m_nlLineNr = nlLineNrSave;
577 m_nlLinePos = nlLinePosSave;
578 m_bEOF = bEOFSave;
581 nRet = CSS1_HASH;
582 bNextCh = false;
583 break;
585 case ' ':
586 case '\t':
587 case '\r':
588 case '\n': // White-Space
589 m_bWhiteSpace = true;
590 break;
592 case sal_Unicode(EOF):
593 if( IsEOF() )
595 m_eState = CSS1_PAR_ACCEPTED;
596 bNextCh = false;
597 break;
599 [[fallthrough]];
601 default: // IDENT | syntax error
602 if (rtl::isAsciiAlpha(m_cNextCh))
604 // IDENT
606 bool bHexColor = true;
608 // parse the next identifier
609 OUStringBuffer sTmpBuffer(64);
610 do {
611 sTmpBuffer.append( m_cNextCh );
612 if( bHexColor )
614 bHexColor = sTmpBuffer.getLength()<7 &&
615 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
616 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
617 ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
619 m_cNextCh = GetNextChar();
620 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
621 '-' == m_cNextCh) && !IsEOF() );
623 m_aToken += sTmpBuffer;
625 if( bHexColor && sTmpBuffer.getLength()==6 )
627 bNextCh = false;
628 nRet = CSS1_HEXCOLOR;
630 break;
632 if( '('==m_cNextCh &&
633 ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
634 m_aToken.equalsIgnoreAsciiCase( "url" )) ||
635 (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
636 (m_aToken.equalsIgnoreAsciiCase( "rgb" ) || m_aToken.equalsIgnoreAsciiCase( "rgba" ) )
637 ) ) )
639 int nNestCnt = 0;
640 OUStringBuffer sTmpBuffer2(64);
641 do {
642 sTmpBuffer2.append( m_cNextCh );
643 switch( m_cNextCh )
645 case '(': nNestCnt++; break;
646 case ')': nNestCnt--; break;
648 m_cNextCh = GetNextChar();
649 } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
650 sTmpBuffer2.append( m_cNextCh );
651 m_aToken += sTmpBuffer2;
652 bNextCh = true;
653 nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
654 ? CSS1_URL
655 : CSS1_RGB;
657 else
659 bNextCh = false;
660 nRet = CSS1_IDENT;
663 // error handling: ignore digit
664 break;
666 if( bNextCh )
667 m_cNextCh = GetNextChar();
669 } while( CSS1_NULL==nRet && IsParserWorking() );
671 return nRet;
674 // These functions implement the parser described in
676 // http://www.w3.org/pub/WWW/TR/WD-css1.html
677 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
679 // for CSS1. It's a direct implementation of the
680 // described Lex grammar.
682 // stylesheet
683 // : import* rule*
685 // import
686 // : IMPORT_SYM url
688 // url
689 // : STRING
691 void CSS1Parser::ParseStyleSheet()
693 LOOP_CHECK_DECL
695 // import*
696 bool bDone = false;
697 while( !bDone && IsParserWorking() )
699 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
701 switch( m_nToken )
703 case CSS1_IMPORT_SYM:
704 // IMPORT_SYM url
705 // URL are skipped without checks
706 m_nToken = GetNextToken();
707 break;
708 case CSS1_IDENT: // Look-Aheads
709 case CSS1_DOT_W_WS:
710 case CSS1_HASH:
711 case CSS1_PAGE_SYM:
712 // rule
713 bDone = true;
714 break;
715 default:
716 // error handling: ignore
717 break;
720 if( !bDone )
721 m_nToken = GetNextToken();
724 LOOP_CHECK_RESTART
726 // rule *
727 while( IsParserWorking() )
729 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
731 switch( m_nToken )
733 case CSS1_IDENT: // Look-Aheads
734 case CSS1_DOT_W_WS:
735 case CSS1_HASH:
736 case CSS1_PAGE_SYM:
737 // rule
738 ParseRule();
739 break;
740 default:
741 // error handling: ignore
742 m_nToken = GetNextToken();
743 break;
748 // rule
749 // : selector [ ',' selector ]*
750 // '{' declaration [ ';' declaration ]* '}'
752 void CSS1Parser::ParseRule()
754 // selector
755 std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
756 if( !pSelector )
757 return;
759 // process selector
760 SelectorParsed( std::move(pSelector), true );
762 LOOP_CHECK_DECL
764 // [ ',' selector ]*
765 while( CSS1_COMMA==m_nToken && IsParserWorking() )
767 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
769 // ignore ','
770 m_nToken = GetNextToken();
772 // selector
773 pSelector = ParseSelector();
774 if( !pSelector )
775 return;
777 // process selector
778 SelectorParsed( std::move(pSelector), false );
781 // '{'
782 if( CSS1_OBRACE != m_nToken )
783 return;
784 m_nToken = GetNextToken();
786 // declaration
787 OUString aProperty;
788 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
789 if( !pExpr )
790 return;
792 // process expression
793 DeclarationParsed( aProperty, std::move(pExpr) );
795 LOOP_CHECK_RESTART
797 // [ ';' declaration ]*
798 while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
800 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
802 // ';'
803 m_nToken = GetNextToken();
805 // declaration
806 if( CSS1_IDENT == m_nToken )
808 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
809 if( pExp )
811 // process expression
812 DeclarationParsed( aProperty, std::move(pExp));
817 // '}'
818 if( CSS1_CBRACE == m_nToken )
819 m_nToken = GetNextToken();
822 // selector
823 // : simple_selector+ [ ':' pseudo_element ]?
825 // simple_selector
826 // : element_name [ DOT_WO_WS class ]?
827 // | DOT_W_WS class
828 // | id_selector
830 // element_name
831 // : IDENT
833 // class
834 // : IDENT
836 // id_selector
837 // : '#' IDENT
839 // pseudo_element
840 // : IDENT
842 std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
844 std::unique_ptr<CSS1Selector> pRoot;
845 CSS1Selector *pLast = nullptr;
847 bool bDone = false;
848 CSS1Selector *pNew = nullptr;
850 LOOP_CHECK_DECL
852 // simple_selector+
853 while( !bDone && IsParserWorking() )
855 LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
857 bool bNextToken = true;
859 switch( m_nToken )
861 case CSS1_IDENT:
863 // element_name [ DOT_WO_WS class ]?
865 // element_name
866 OUString aElement = m_aToken;
867 CSS1SelectorType eType = CSS1_SELTYPE_ELEMENT;
868 m_nToken = GetNextToken();
870 if( CSS1_DOT_WO_WS == m_nToken )
872 // DOT_WO_WS
873 m_nToken = GetNextToken();
875 // class
876 if( CSS1_IDENT == m_nToken )
878 aElement += "." + m_aToken;
879 eType = CSS1_SELTYPE_ELEM_CLASS;
881 else
883 // missing class
884 return pRoot;
887 else
889 // that was a look-ahead
890 bNextToken = false;
892 pNew = new CSS1Selector( eType, aElement );
894 break;
895 case CSS1_DOT_W_WS:
896 // DOT_W_WS class
898 // DOT_W_WS
899 m_nToken = GetNextToken();
901 if( CSS1_IDENT==m_nToken )
903 // class
904 pNew = new CSS1Selector( CSS1_SELTYPE_CLASS, m_aToken );
906 else
908 // missing class
909 return pRoot;
911 break;
912 case CSS1_HASH:
913 // '#' id_selector
915 // '#'
916 m_nToken = GetNextToken();
918 if( CSS1_IDENT==m_nToken )
920 // id_selector
921 pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken );
923 else
925 // missing id_selector
926 return pRoot;
928 break;
930 case CSS1_PAGE_SYM:
932 // @page
933 pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken );
935 break;
937 default:
938 // stop because we don't know what's next
939 bDone = true;
940 break;
943 // if created a new selector then save it
944 if( pNew )
946 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
947 "Root-Selector, but no Last" );
948 if( pLast )
949 pLast->SetNext( pNew );
950 else
951 pRoot.reset(pNew);
953 pLast = pNew;
954 pNew = nullptr;
957 if( bNextToken && !bDone )
958 m_nToken = GetNextToken();
961 if( !pRoot )
963 // missing simple_selector
964 return pRoot;
967 // [ ':' pseudo_element ]?
968 if( CSS1_COLON==m_nToken && IsParserWorking() )
970 // ':' pseudo element
971 m_nToken = GetNextToken();
972 if( CSS1_IDENT==m_nToken )
974 if (pLast)
975 pLast->SetNext( new CSS1Selector(CSS1_SELTYPE_PSEUDO,m_aToken) );
976 m_nToken = GetNextToken();
978 else
980 // missing pseudo_element
981 return pRoot;
985 return pRoot;
988 // declaration
989 // : property ':' expr prio?
990 // | /* empty */
992 // expression
993 // : term [ operator term ]*
995 // term
996 // : unary_operator?
997 // [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
998 // HEXCOLOR | URL | RGB ]
1000 // operator
1001 // : '/' | ',' | /* empty */
1003 // unary_operator
1004 // : '-' | '+'
1006 // property
1007 // : ident
1009 // the sign is only used for numeric values (except PERCENTAGE)
1010 // and it's applied on nValue!
1011 std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
1013 std::unique_ptr<CSS1Expression> pRoot;
1014 CSS1Expression *pLast = nullptr;
1016 // property
1017 if( CSS1_IDENT != m_nToken )
1019 // missing property
1020 return pRoot;
1022 rProperty = m_aToken;
1024 m_nToken = GetNextToken();
1026 // ':'
1027 if( CSS1_COLON != m_nToken )
1029 // missing ':'
1030 return pRoot;
1032 m_nToken = GetNextToken();
1034 // term [operator term]*
1035 // here we're pretty lax regarding the syntax, but this shouldn't
1036 // be a problem
1037 bool bDone = false;
1038 sal_Unicode cSign = 0, cOp = 0;
1039 CSS1Expression *pNew = nullptr;
1041 LOOP_CHECK_DECL
1043 while( !bDone && IsParserWorking() )
1045 LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1047 switch( m_nToken )
1049 case CSS1_MINUS:
1050 cSign = '-';
1051 break;
1053 case CSS1_PLUS:
1054 cSign = '+';
1055 break;
1057 case CSS1_NUMBER:
1058 case CSS1_LENGTH:
1059 case CSS1_PIXLENGTH:
1060 case CSS1_EMS:
1061 case CSS1_EMX:
1062 if( '-'==cSign )
1063 m_nValue = -m_nValue;
1064 [[fallthrough]];
1065 case CSS1_STRING:
1066 case CSS1_PERCENTAGE:
1067 case CSS1_IDENT:
1068 case CSS1_URL:
1069 case CSS1_RGB:
1070 case CSS1_HEXCOLOR:
1071 pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1072 m_nValue = 0; // otherwise this also is applied to next ident
1073 cSign = 0;
1074 cOp = 0;
1075 break;
1077 case CSS1_SLASH:
1078 cOp = '/';
1079 cSign = 0;
1080 break;
1082 case CSS1_COMMA:
1083 cOp = ',';
1084 cSign = 0;
1085 break;
1087 default:
1088 bDone = true;
1089 break;
1092 // if created a new expression save it
1093 if( pNew )
1095 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1096 "Root-Selector, but no Last" );
1097 if( pLast )
1098 pLast->SetNext( pNew );
1099 else
1100 pRoot.reset(pNew);
1102 pLast = pNew;
1103 pNew = nullptr;
1106 if( !bDone )
1107 m_nToken = GetNextToken();
1110 if( !pRoot )
1112 // missing term
1113 return pRoot;
1116 // prio?
1117 if( CSS1_IMPORTANT_SYM==m_nToken )
1119 // IMPORTANT_SYM
1120 m_nToken = GetNextToken();
1123 return pRoot;
1126 CSS1Parser::CSS1Parser()
1127 : m_bWhiteSpace(false)
1128 , m_bEOF(false)
1129 , m_cNextCh(0)
1130 , m_nInPos(0)
1131 , m_nlLineNr(0)
1132 , m_nlLinePos(0)
1133 , m_nValue(0)
1134 , m_eState(CSS1_PAR_ACCEPTED)
1135 , m_nToken(CSS1_NULL)
1139 CSS1Parser::~CSS1Parser()
1143 void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1145 OUString aTmp( rIn );
1147 sal_Unicode c;
1148 while( !aTmp.isEmpty() &&
1149 ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1150 aTmp = aTmp.copy( 1 );
1152 while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1153 || '\t'==c || '\r'==c || '\n'==c ) )
1154 aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1156 // remove SGML comments
1157 if( aTmp.getLength() >= 4 &&
1158 aTmp.startsWith( "<!--" ) )
1159 aTmp = aTmp.copy( 4 );
1161 if( aTmp.getLength() >=3 &&
1162 aTmp.endsWith("-->") )
1163 aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1165 if( aTmp.isEmpty() )
1166 return;
1168 InitRead( aTmp );
1170 ParseStyleSheet();
1173 void CSS1Parser::ParseStyleOption( const OUString& rIn )
1175 if( rIn.isEmpty() )
1176 return;
1178 InitRead( rIn );
1180 // fdo#41796: skip over spurious semicolons
1181 while (CSS1_SEMICOLON == m_nToken)
1183 m_nToken = GetNextToken();
1186 OUString aProperty;
1187 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1188 if( !pExpr )
1189 return;
1191 // process expression
1192 DeclarationParsed( aProperty, std::move(pExpr) );
1194 LOOP_CHECK_DECL
1196 // [ ';' declaration ]*
1197 while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
1199 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1201 m_nToken = GetNextToken();
1202 if( CSS1_IDENT==m_nToken )
1204 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1205 if( pExp )
1207 // process expression
1208 DeclarationParsed( aProperty, std::move(pExp) );
1214 void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1218 void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1219 std::unique_ptr<CSS1Expression> /* pExpr */ )
1223 CSS1Selector::~CSS1Selector()
1225 delete m_pNext;
1228 CSS1Expression::~CSS1Expression()
1230 delete pNext;
1233 void CSS1Expression::GetURL( OUString& rURL ) const
1235 OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1237 OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1238 aValue.getLength() > 5 &&
1239 '(' == aValue[3] &&
1240 ')' == aValue[aValue.getLength()-1],
1241 "no valid URL(...)" );
1243 if( aValue.getLength() <= 5 )
1244 return;
1246 rURL = aValue.copy( 4, aValue.getLength() - 5 );
1248 // tdf#94088 original stripped only spaces, but there may also be
1249 // double quotes in CSS style URLs, so be prepared to spaces followed
1250 // by a single quote followed by spaces
1251 const sal_Unicode aSpace(' ');
1252 const sal_Unicode aSingleQuote('\'');
1254 rURL = comphelper::string::strip(rURL, aSpace);
1255 rURL = comphelper::string::strip(rURL, aSingleQuote);
1256 rURL = comphelper::string::strip(rURL, aSpace);
1259 bool CSS1Expression::GetColor( Color &rColor ) const
1261 OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1262 CSS1_HEXCOLOR==eType || CSS1_STRING==eType,
1263 "CSS1-Expression cannot be colour" );
1265 bool bRet = false;
1266 sal_uInt32 nColor = SAL_MAX_UINT32;
1268 switch( eType )
1270 case CSS1_RGB:
1272 // fourth value to 255 means no alpha transparency
1273 // so the right by default value
1274 sal_uInt8 aColors[4] = { 0, 0, 0, 255 };
1276 // it can be "rgb" or "rgba"
1277 if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1278 (aValue[3] != '(' && aValue[4] != '(' ) || aValue[aValue.getLength()-1] != ')')
1280 break;
1283 sal_Int32 nPos = aValue.startsWithIgnoreAsciiCase( "rgba" )?5:4; // start after "rgba(" or "rgb("
1284 char cSep = (aValue.indexOf(',') != -1)?',':' ';
1285 // alpha value can be after a "/" or ","
1286 bool bIsSepAlphaDiv = (aValue.indexOf('/') != -1)?true:false;
1287 for ( int nCol = 0; nCol < 4 && nPos > 0; ++nCol )
1289 const std::u16string_view aNumber = o3tl::getToken(aValue, 0, cSep, nPos);
1291 sal_Int32 nNumber = o3tl::toInt32(aNumber);
1292 if( nNumber<0 )
1294 nNumber = 0;
1296 else if( aNumber.find('%') != std::u16string_view::npos )
1298 if( nNumber > 100 )
1299 nNumber = 100;
1300 nNumber *= 255;
1301 nNumber /= 100;
1303 else if( nNumber > 255 )
1304 nNumber = 255;
1305 else if( aNumber.find('.') != std::u16string_view::npos )
1307 // in this case aNumber contains something like "0.3" so not an sal_Int32
1308 nNumber = static_cast<sal_Int32>(255.0*o3tl::toDouble(aNumber));
1310 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1311 // rgb with alpha and '/' has this form: rgb(255 0 0 / 50%)
1312 if (bIsSepAlphaDiv && nCol == 2)
1314 // but there can be some spaces or not before and after the "/", so skip them
1315 while (aValue[nPos] == '/' || aValue[nPos] == ' ')
1316 ++nPos;
1320 rColor.SetRed( aColors[0] );
1321 rColor.SetGreen( aColors[1] );
1322 rColor.SetBlue( aColors[2] );
1323 rColor.SetAlpha( aColors[3] );
1325 bRet = true; // something different than a colour isn't possible
1327 break;
1329 case CSS1_IDENT:
1330 case CSS1_STRING:
1332 OUString aTmp( aValue.toAsciiUpperCase() );
1333 nColor = GetHTMLColor( aTmp );
1334 bRet = nColor != SAL_MAX_UINT32;
1336 if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1337 aValue[0] != '#' )
1338 break;
1339 [[fallthrough]];
1340 case CSS1_HEXCOLOR:
1342 // MS-IE hack: colour can also be a string
1343 sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1344 bool bDouble = aValue.getLength()-nOffset == 3;
1345 sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1347 nColor = 0;
1348 for( ; i<nEnd; i++ )
1350 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1351 : '0' );
1352 if( c >= '0' && c <= '9' )
1353 c -= 48;
1354 else if( c >= 'A' && c <= 'F' )
1355 c -= 55;
1356 else if( c >= 'a' && c <= 'f' )
1357 c -= 87;
1358 else
1359 c = 16;
1361 nColor *= 16;
1362 if( c<16 )
1363 nColor += c;
1364 if( bDouble )
1366 nColor *= 16;
1367 if( c<16 )
1368 nColor += c;
1371 bRet = true;
1373 break;
1374 default:
1378 if( bRet && nColor!=SAL_MAX_UINT32 )
1380 rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1381 rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1382 rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1385 return bRet;
1388 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */