1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: syntaxhighlight.cxx,v $
10 * $Revision: 1.1.2.4 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_svtools.hxx"
34 #include <svtools/syntaxhighlight.hxx>
36 #include <unotools/charclass.hxx>
37 #include <tools/debug.hxx>
40 SV_IMPL_VARARR(HighlightPortions
, HighlightPortion
)
43 // ##########################################################################
44 // ATTENTION: all these words needs to be in small caps
45 // ##########################################################################
46 static const char* strListBasicKeyWords
[] = {
174 static const char* strListSqlKeyWords
[] = {
239 extern "C" int CDECL
compare_strings( const void *arg1
, const void *arg2
)
241 return strcmp( (char *)arg1
, *(char **)arg2
);
247 bool IsLetterTab
[256];
252 inline bool isLetter( sal_Unicode c
)
254 bool bRet
= (c
< 256) ? IsLetterTab
[c
] : isLetterUnicode( c
);
257 bool isLetterUnicode( sal_Unicode c
);
260 class BasicSimpleCharClass
262 static LetterTable aLetterTable
;
265 static BOOL
isAlpha( sal_Unicode c
, bool bCompatible
)
267 BOOL bRet
= (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z')
268 || (bCompatible
&& aLetterTable
.isLetter( c
));
272 static BOOL
isDigit( sal_Unicode c
)
274 BOOL bRet
= (c
>= '0' && c
<= '9');
278 static BOOL
isAlphaNumeric( sal_Unicode c
, bool bCompatible
)
280 BOOL bRet
= isDigit( c
) || isAlpha( c
, bCompatible
);
285 LetterTable
BasicSimpleCharClass::aLetterTable
;
287 LetterTable::LetterTable( void )
289 for( int i
= 0 ; i
< 256 ; ++i
)
290 IsLetterTab
[i
] = false;
292 IsLetterTab
[0xC0] = true; // À , CAPITAL LETTER A WITH GRAVE ACCENT
293 IsLetterTab
[0xC1] = true; // Á , CAPITAL LETTER A WITH ACUTE ACCENT
294 IsLetterTab
[0xC2] = true; // Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
295 IsLetterTab
[0xC3] = true; // Ã , CAPITAL LETTER A WITH TILDE
296 IsLetterTab
[0xC4] = true; // Ä , CAPITAL LETTER A WITH DIAERESIS
297 IsLetterTab
[0xC5] = true; // Å , CAPITAL LETTER A WITH RING ABOVE
298 IsLetterTab
[0xC6] = true; // Æ , CAPITAL LIGATURE AE
299 IsLetterTab
[0xC7] = true; // Ç , CAPITAL LETTER C WITH CEDILLA
300 IsLetterTab
[0xC8] = true; // È , CAPITAL LETTER E WITH GRAVE ACCENT
301 IsLetterTab
[0xC9] = true; // É , CAPITAL LETTER E WITH ACUTE ACCENT
302 IsLetterTab
[0xCA] = true; // Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
303 IsLetterTab
[0xCB] = true; // Ë , CAPITAL LETTER E WITH DIAERESIS
304 IsLetterTab
[0xCC] = true; // Ì , CAPITAL LETTER I WITH GRAVE ACCENT
305 IsLetterTab
[0xCD] = true; // Í , CAPITAL LETTER I WITH ACUTE ACCENT
306 IsLetterTab
[0xCE] = true; // Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
307 IsLetterTab
[0xCF] = true; // Ï , CAPITAL LETTER I WITH DIAERESIS
308 IsLetterTab
[0xD0] = true; // Ð , CAPITAL LETTER ETH
309 IsLetterTab
[0xD1] = true; // Ñ , CAPITAL LETTER N WITH TILDE
310 IsLetterTab
[0xD2] = true; // Ò , CAPITAL LETTER O WITH GRAVE ACCENT
311 IsLetterTab
[0xD3] = true; // Ó , CAPITAL LETTER O WITH ACUTE ACCENT
312 IsLetterTab
[0xD4] = true; // Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
313 IsLetterTab
[0xD5] = true; // Õ , CAPITAL LETTER O WITH TILDE
314 IsLetterTab
[0xD6] = true; // Ö , CAPITAL LETTER O WITH DIAERESIS
315 IsLetterTab
[0xD8] = true; // Ø , CAPITAL LETTER O WITH STROKE
316 IsLetterTab
[0xD9] = true; // Ù , CAPITAL LETTER U WITH GRAVE ACCENT
317 IsLetterTab
[0xDA] = true; // Ú , CAPITAL LETTER U WITH ACUTE ACCENT
318 IsLetterTab
[0xDB] = true; // Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
319 IsLetterTab
[0xDC] = true; // Ü , CAPITAL LETTER U WITH DIAERESIS
320 IsLetterTab
[0xDD] = true; // Ý , CAPITAL LETTER Y WITH ACUTE ACCENT
321 IsLetterTab
[0xDE] = true; // Þ , CAPITAL LETTER THORN
322 IsLetterTab
[0xDF] = true; // ß , SMALL LETTER SHARP S
323 IsLetterTab
[0xE0] = true; // à , SMALL LETTER A WITH GRAVE ACCENT
324 IsLetterTab
[0xE1] = true; // á , SMALL LETTER A WITH ACUTE ACCENT
325 IsLetterTab
[0xE2] = true; // â , SMALL LETTER A WITH CIRCUMFLEX ACCENT
326 IsLetterTab
[0xE3] = true; // ã , SMALL LETTER A WITH TILDE
327 IsLetterTab
[0xE4] = true; // ä , SMALL LETTER A WITH DIAERESIS
328 IsLetterTab
[0xE5] = true; // å , SMALL LETTER A WITH RING ABOVE
329 IsLetterTab
[0xE6] = true; // æ , SMALL LIGATURE AE
330 IsLetterTab
[0xE7] = true; // ç , SMALL LETTER C WITH CEDILLA
331 IsLetterTab
[0xE8] = true; // è , SMALL LETTER E WITH GRAVE ACCENT
332 IsLetterTab
[0xE9] = true; // é , SMALL LETTER E WITH ACUTE ACCENT
333 IsLetterTab
[0xEA] = true; // ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT
334 IsLetterTab
[0xEB] = true; // ë , SMALL LETTER E WITH DIAERESIS
335 IsLetterTab
[0xEC] = true; // ì , SMALL LETTER I WITH GRAVE ACCENT
336 IsLetterTab
[0xED] = true; // í , SMALL LETTER I WITH ACUTE ACCENT
337 IsLetterTab
[0xEE] = true; // î , SMALL LETTER I WITH CIRCUMFLEX ACCENT
338 IsLetterTab
[0xEF] = true; // ï , SMALL LETTER I WITH DIAERESIS
339 IsLetterTab
[0xF0] = true; // ð , SMALL LETTER ETH
340 IsLetterTab
[0xF1] = true; // ñ , SMALL LETTER N WITH TILDE
341 IsLetterTab
[0xF2] = true; // ò , SMALL LETTER O WITH GRAVE ACCENT
342 IsLetterTab
[0xF3] = true; // ó , SMALL LETTER O WITH ACUTE ACCENT
343 IsLetterTab
[0xF4] = true; // ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT
344 IsLetterTab
[0xF5] = true; // õ , SMALL LETTER O WITH TILDE
345 IsLetterTab
[0xF6] = true; // ö , SMALL LETTER O WITH DIAERESIS
346 IsLetterTab
[0xF8] = true; // ø , SMALL LETTER O WITH OBLIQUE BAR
347 IsLetterTab
[0xF9] = true; // ù , SMALL LETTER U WITH GRAVE ACCENT
348 IsLetterTab
[0xFA] = true; // ú , SMALL LETTER U WITH ACUTE ACCENT
349 IsLetterTab
[0xFB] = true; // û , SMALL LETTER U WITH CIRCUMFLEX ACCENT
350 IsLetterTab
[0xFC] = true; // ü , SMALL LETTER U WITH DIAERESIS
351 IsLetterTab
[0xFD] = true; // ý , SMALL LETTER Y WITH ACUTE ACCENT
352 IsLetterTab
[0xFE] = true; // þ , SMALL LETTER THORN
353 IsLetterTab
[0xFF] = true; // ÿ , SMALL LETTER Y WITH DIAERESIS
356 bool LetterTable::isLetterUnicode( sal_Unicode c
)
358 static CharClass
* pCharClass
= NULL
;
359 if( pCharClass
== NULL
)
360 pCharClass
= new CharClass( Application::GetSettings().GetLocale() );
362 bool bRet
= pCharClass
->isLetter( aStr
, 0 );
366 // Hilfsfunktion: Zeichen-Flag Testen
367 BOOL
SimpleTokenizer_Impl::testCharFlags( sal_Unicode c
, USHORT nTestFlags
)
370 if( c
!= 0 && c
<= 255 )
372 bRet
= ( (aCharTypeTab
[c
] & nTestFlags
) != 0 );
376 bRet
= (( CHAR_START_IDENTIFIER
| CHAR_IN_IDENTIFIER
) & nTestFlags
) != 0
377 ? BasicSimpleCharClass::isAlpha( c
, true ) : false;
382 void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords
, UINT16 nCount
)
384 ppListKeyWords
= ppKeyWords
;
385 nKeyWordCount
= nCount
;
389 BOOL
SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes
& reType
,
390 /*out*/const sal_Unicode
*& rpStartPos
, /*out*/const sal_Unicode
*& rpEndPos
)
395 rpStartPos
= mpActualPos
;
397 // Zeichen untersuchen
398 sal_Unicode c
= peekChar();
405 //*** Alle Moeglichkeiten durchgehen ***
407 if ( (testCharFlags( c
, CHAR_SPACE
) == TRUE
) )
409 while( testCharFlags( peekChar(), CHAR_SPACE
) == TRUE
)
412 reType
= TT_WHITESPACE
;
416 else if ( (testCharFlags( c
, CHAR_START_IDENTIFIER
) == TRUE
) )
418 BOOL bIdentifierChar
;
421 // Naechstes Zeichen holen
423 bIdentifierChar
= testCharFlags( c
, CHAR_IN_IDENTIFIER
);
424 if( bIdentifierChar
)
427 while( bIdentifierChar
);
429 reType
= TT_IDENTIFIER
;
431 // Schluesselwort-Tabelle
432 if (ppListKeyWords
!= NULL
)
434 int nCount
= mpActualPos
- rpStartPos
;
436 // No keyword if string contains char > 255
437 bool bCanBeKeyword
= true;
438 for( int i
= 0 ; i
< nCount
; i
++ )
440 if( rpStartPos
[i
] > 255 )
442 bCanBeKeyword
= false;
449 String
aKWString(rpStartPos
, sal::static_int_cast
< xub_StrLen
>(nCount
) );
450 ByteString
aByteStr( aKWString
, RTL_TEXTENCODING_ASCII_US
);
451 aByteStr
.ToLowerAscii();
452 if ( bsearch( aByteStr
.GetBuffer(), ppListKeyWords
, nKeyWordCount
, sizeof( char* ),
455 reType
= TT_KEYWORDS
;
457 if ( aByteStr
.Equals( "rem" ) )
459 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
460 sal_Unicode cPeek
= peekChar();
461 while( cPeek
!= CHAR_EOF
&& testCharFlags( cPeek
, CHAR_EOL
) == FALSE
)
475 // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
476 else if ( ( testCharFlags( c
, CHAR_OPERATOR
) == TRUE
) || ( (c
== '\'') && (aLanguage
==HIGHLIGHT_BASIC
)) )
478 // paramters for SQL view
479 if ( (c
==':') || (c
=='?'))
483 BOOL bIdentifierChar
;
486 // Naechstes Zeichen holen
488 bIdentifierChar
= BasicSimpleCharClass::isAlpha( c
, true );
489 if( bIdentifierChar
)
492 while( bIdentifierChar
);
494 reType
= TT_PARAMETER
;
498 sal_Unicode cPeekNext
= peekChar();
501 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
502 while( cPeekNext
!= CHAR_EOF
&& testCharFlags( cPeekNext
, CHAR_EOL
) == FALSE
)
505 cPeekNext
= peekChar();
512 sal_Unicode cPeekNext
= peekChar();
515 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
516 while( cPeekNext
!= CHAR_EOF
&& testCharFlags( cPeekNext
, CHAR_EOL
) == FALSE
)
519 cPeekNext
= peekChar();
529 c
= getChar(); // '/' entfernen
531 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
532 sal_Unicode cPeek
= peekChar();
533 while( cPeek
!= CHAR_EOF
&& testCharFlags( cPeek
, CHAR_EOL
) == FALSE
)
542 // Echter Operator, kann hier einfach behandelt werden,
543 // da nicht der wirkliche Operator, wie z.B. += interessiert,
544 // sondern nur die Tatsache, dass es sich um einen handelt.
545 if( reType
!= TT_COMMENT
)
547 reType
= TT_OPERATOR
;
553 // Objekt-Trenner? Muss vor Number abgehandelt werden
554 else if( c
== '.' && ( peekChar() < '0' || peekChar() > '9' ) )
556 reType
= TT_OPERATOR
;
560 else if( testCharFlags( c
, CHAR_START_NUMBER
) == TRUE
)
564 // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
567 // Ist es eine Hex- oder Oct-Zahl?
571 if( peekChar() == 'o' || peekChar() == 'O' )
575 nRadix
= 8; // Octal-Basis
577 // Alle Ziffern einlesen
578 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER
) )
582 else if( peekChar() == 'h' || peekChar() == 'H' )
586 nRadix
= 16; // Hex-Basis
588 // Alle Ziffern einlesen und puffern
589 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER
) )
594 reType
= TT_OPERATOR
;
598 // Wenn nicht Oct oder Hex als double ansehen
599 if( reType
== TT_NUMBER
&& nRadix
== 10 )
601 // Flag, ob das letzte Zeichen ein Exponent war
602 BOOL bAfterExpChar
= FALSE
;
604 // Alle Ziffern einlesen
605 while( testCharFlags( peekChar(), CHAR_IN_NUMBER
) ||
606 (bAfterExpChar
&& peekChar() == '+' ) ||
607 (bAfterExpChar
&& peekChar() == '-' ) )
608 // Nach Exponent auch +/- OK
610 c
= getChar(); // Zeichen lesen
611 bAfterExpChar
= ( c
== 'e' || c
== 'E' );
615 // reType = TT_NUMBER;
619 else if( testCharFlags( c
, CHAR_START_STRING
) == TRUE
)
621 // Merken, welches Zeichen den String eroeffnet hat
622 sal_Unicode cEndString
= c
;
626 // Alle Ziffern einlesen und puffern
627 while( peekChar() != cEndString
)
629 // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
630 if( peekChar() == CHAR_EOF
)
632 // ERROR: unterminated string literal
637 if( testCharFlags( c
, CHAR_EOL
) == TRUE
)
639 // ERROR: unterminated string literal
646 if( reType
!= TT_ERROR
)
649 if( cEndString
== ']' )
650 reType
= TT_IDENTIFIER
;
657 else if( testCharFlags( c
, CHAR_EOL
) == TRUE
)
659 // Falls ein weiteres anderes EOL-Char folgt, weg damit
660 sal_Unicode cNext
= peekChar();
661 if( cNext
!= c
&& testCharFlags( cNext
, CHAR_EOL
) == TRUE
)
664 // Positions-Daten auf Zeilen-Beginn setzen
671 // Alles andere bleibt TT_UNKNOWN
674 // End-Position eintragen
675 rpEndPos
= mpActualPos
;
679 String
SimpleTokenizer_Impl::getTokStr
680 ( /*out*/const sal_Unicode
* pStartPos
, /*out*/const sal_Unicode
* pEndPos
)
682 return String( pStartPos
, (USHORT
)( pEndPos
- pStartPos
) );
686 // TEST: Token ausgeben
687 String
SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType
,
688 /*out*/const sal_Unicode
* pStartPos
, /*out*/const sal_Unicode
* pEndPos
)
693 case TT_UNKNOWN
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
694 case TT_IDENTIFIER
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
695 case TT_WHITESPACE
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
696 case TT_NUMBER
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
697 case TT_STRING
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
698 case TT_EOL
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
699 case TT_COMMENT
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
700 case TT_ERROR
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
701 case TT_OPERATOR
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
702 case TT_KEYWORDS
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
703 case TT_PARAMETER
: aOut
= String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
705 if( eType
!= TT_EOL
)
707 aOut
+= String( pStartPos
, (USHORT
)( pEndPos
- pStartPos
) );
709 aOut
+= String( RTL_CONSTASCII_USTRINGPARAM("\n") );
714 SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang
): aLanguage(aLang
)
716 memset( aCharTypeTab
, 0, sizeof( aCharTypeTab
) );
718 // Zeichen-Tabelle fuellen
721 // Zulaessige Zeichen fuer Identifier
722 USHORT nHelpMask
= (USHORT
)( CHAR_START_IDENTIFIER
| CHAR_IN_IDENTIFIER
);
723 for( i
= 'a' ; i
<= 'z' ; i
++ )
724 aCharTypeTab
[i
] |= nHelpMask
;
725 for( i
= 'A' ; i
<= 'Z' ; i
++ )
726 aCharTypeTab
[i
] |= nHelpMask
;
727 // '_' extra eintragen
728 aCharTypeTab
[(int)'_'] |= nHelpMask
;
729 // AB 23.6.97: '$' ist auch erlaubt
730 aCharTypeTab
[(int)'$'] |= nHelpMask
;
732 // Ziffern (Identifier und Number ist moeglich)
733 nHelpMask
= (USHORT
)( CHAR_IN_IDENTIFIER
| CHAR_START_NUMBER
|
734 CHAR_IN_NUMBER
| CHAR_IN_HEX_NUMBER
);
735 for( i
= '0' ; i
<= '9' ; i
++ )
736 aCharTypeTab
[i
] |= nHelpMask
;
738 // e und E sowie . von Hand ergaenzen
739 aCharTypeTab
[(int)'e'] |= CHAR_IN_NUMBER
;
740 aCharTypeTab
[(int)'E'] |= CHAR_IN_NUMBER
;
741 aCharTypeTab
[(int)'.'] |= (USHORT
)( CHAR_IN_NUMBER
| CHAR_START_NUMBER
);
742 aCharTypeTab
[(int)'&'] |= CHAR_START_NUMBER
;
745 for( i
= 'a' ; i
<= 'f' ; i
++ )
746 aCharTypeTab
[i
] |= CHAR_IN_HEX_NUMBER
;
747 for( i
= 'A' ; i
<= 'F' ; i
++ )
748 aCharTypeTab
[i
] |= CHAR_IN_HEX_NUMBER
;
751 for( i
= '0' ; i
<= '7' ; i
++ )
752 aCharTypeTab
[i
] |= CHAR_IN_OCT_NUMBER
;
754 // String-Beginn/End-Zeichen
755 aCharTypeTab
[(int)'\''] |= CHAR_START_STRING
;
756 aCharTypeTab
[(int)'\"'] |= CHAR_START_STRING
;
757 aCharTypeTab
[(int)'['] |= CHAR_START_STRING
;
758 aCharTypeTab
[(int)'`'] |= CHAR_START_STRING
;
761 aCharTypeTab
[(int)'!'] |= CHAR_OPERATOR
;
762 aCharTypeTab
[(int)'%'] |= CHAR_OPERATOR
;
763 // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
764 aCharTypeTab
[(int)'('] |= CHAR_OPERATOR
;
765 aCharTypeTab
[(int)')'] |= CHAR_OPERATOR
;
766 aCharTypeTab
[(int)'*'] |= CHAR_OPERATOR
;
767 aCharTypeTab
[(int)'+'] |= CHAR_OPERATOR
;
768 aCharTypeTab
[(int)','] |= CHAR_OPERATOR
;
769 aCharTypeTab
[(int)'-'] |= CHAR_OPERATOR
;
770 aCharTypeTab
[(int)'/'] |= CHAR_OPERATOR
;
771 aCharTypeTab
[(int)':'] |= CHAR_OPERATOR
;
772 aCharTypeTab
[(int)'<'] |= CHAR_OPERATOR
;
773 aCharTypeTab
[(int)'='] |= CHAR_OPERATOR
;
774 aCharTypeTab
[(int)'>'] |= CHAR_OPERATOR
;
775 aCharTypeTab
[(int)'?'] |= CHAR_OPERATOR
;
776 aCharTypeTab
[(int)'^'] |= CHAR_OPERATOR
;
777 aCharTypeTab
[(int)'|'] |= CHAR_OPERATOR
;
778 aCharTypeTab
[(int)'~'] |= CHAR_OPERATOR
;
779 aCharTypeTab
[(int)'{'] |= CHAR_OPERATOR
;
780 aCharTypeTab
[(int)'}'] |= CHAR_OPERATOR
;
781 // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
782 aCharTypeTab
[(int)']'] |= CHAR_OPERATOR
;
783 aCharTypeTab
[(int)';'] |= CHAR_OPERATOR
;
786 aCharTypeTab
[(int)' ' ] |= CHAR_SPACE
;
787 aCharTypeTab
[(int)'\t'] |= CHAR_SPACE
;
789 // Zeilen-Ende-Zeichen
790 aCharTypeTab
[(int)'\r'] |= CHAR_EOL
;
791 aCharTypeTab
[(int)'\n'] |= CHAR_EOL
;
793 ppListKeyWords
= NULL
;
796 SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
800 SimpleTokenizer_Impl
* getSimpleTokenizer( void )
802 static SimpleTokenizer_Impl
* pSimpleTokenizer
= NULL
;
803 if( !pSimpleTokenizer
)
804 pSimpleTokenizer
= new SimpleTokenizer_Impl();
805 return pSimpleTokenizer
;
808 // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
809 UINT16
SimpleTokenizer_Impl::parseLine( UINT32 nParseLine
, const String
* aSource
)
811 // Position auf den Anfang des Source-Strings setzen
812 mpStringBegin
= mpActualPos
= aSource
->GetBuffer();
814 // Zeile und Spalte initialisieren
818 // Variablen fuer die Out-Parameter
820 const sal_Unicode
* pStartPos
;
821 const sal_Unicode
* pEndPos
;
823 // Schleife ueber alle Tokens
824 UINT16 nTokenCount
= 0;
825 while( getNextToken( eType
, pStartPos
, pEndPos
) )
831 void SimpleTokenizer_Impl::getHighlightPortions( UINT32 nParseLine
, const String
& rLine
,
832 /*out*/HighlightPortions
& portions
)
834 // Position auf den Anfang des Source-Strings setzen
835 mpStringBegin
= mpActualPos
= rLine
.GetBuffer();
837 // Zeile und Spalte initialisieren
841 // Variablen fuer die Out-Parameter
843 const sal_Unicode
* pStartPos
;
844 const sal_Unicode
* pEndPos
;
846 // Schleife ueber alle Tokens
847 while( getNextToken( eType
, pStartPos
, pEndPos
) )
849 HighlightPortion portion
;
851 portion
.nBegin
= (UINT16
)(pStartPos
- mpStringBegin
);
852 portion
.nEnd
= (UINT16
)(pEndPos
- mpStringBegin
);
853 portion
.tokenType
= eType
;
855 portions
.Insert(portion
, portions
.Count());
860 //////////////////////////////////////////////////////////////////////////
861 // Implementierung des SyntaxHighlighter
863 SyntaxHighlighter::SyntaxHighlighter()
865 m_pSimpleTokenizer
= 0;
870 SyntaxHighlighter::~SyntaxHighlighter()
872 delete m_pSimpleTokenizer
;
876 void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_
)
878 eLanguage
= eLanguage_
;
879 delete m_pSimpleTokenizer
;
880 m_pSimpleTokenizer
= new SimpleTokenizer_Impl(eLanguage
);
884 case HIGHLIGHT_BASIC
:
885 m_pSimpleTokenizer
->setKeyWords( strListBasicKeyWords
,
886 sizeof( strListBasicKeyWords
) / sizeof( char* ));
889 m_pSimpleTokenizer
->setKeyWords( strListSqlKeyWords
,
890 sizeof( strListSqlKeyWords
) / sizeof( char* ));
893 m_pSimpleTokenizer
->setKeyWords( NULL
, 0 );
897 const Range
SyntaxHighlighter::notifyChange( UINT32 nLine
, INT32 nLineCountDifference
,
898 const String
* pChangedLines
, UINT32 nArrayLength
)
900 (void)nLineCountDifference
;
902 for( UINT32 i
=0 ; i
< nArrayLength
; i
++ )
903 m_pSimpleTokenizer
->parseLine(nLine
+i
, &pChangedLines
[i
]);
905 return Range( nLine
, nLine
+ nArrayLength
-1 );
908 void SyntaxHighlighter::getHighlightPortions( UINT32 nLine
, const String
& rLine
,
909 /*out*/HighlightPortions
& portions
)
911 m_pSimpleTokenizer
->getHighlightPortions( nLine
, rLine
, portions
);