update dev300-m57
[ooovba.git] / svtools / source / edit / syntaxhighlight.cxx
blobcad466ef9afefdfe0683df3440d26669420598cd
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: syntaxhighlight.cxx,v $
10 * $Revision: 1.1.2.4 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_svtools.hxx"
34 #include <svtools/syntaxhighlight.hxx>
36 #include <unotools/charclass.hxx>
37 #include <tools/debug.hxx>
40 SV_IMPL_VARARR(HighlightPortions, HighlightPortion)
43 // ##########################################################################
44 // ATTENTION: all these words needs to be in small caps
45 // ##########################################################################
46 static const char* strListBasicKeyWords[] = {
47 "access",
48 "alias",
49 "and",
50 "any",
51 "append",
52 "as",
53 "base",
54 "binary",
55 "boolean",
56 "byref",
57 "byte",
58 "byval",
59 "call",
60 "case",
61 "cdecl",
62 "classmodule",
63 "close",
64 "compare",
65 "compatible",
66 "const",
67 "currency",
68 "date",
69 "declare",
70 "defbool",
71 "defcur",
72 "defdate",
73 "defdbl",
74 "deferr",
75 "defint",
76 "deflng",
77 "defobj",
78 "defsng",
79 "defstr",
80 "defvar",
81 "dim",
82 "do",
83 "double",
84 "each",
85 "else",
86 "elseif",
87 "end",
88 "end enum",
89 "end function",
90 "end if",
91 "end select",
92 "end sub",
93 "end type",
94 "endif",
95 "enum",
96 "eqv",
97 "erase",
98 "error",
99 "exit",
100 "explicit",
101 "for",
102 "function",
103 "get",
104 "global",
105 "gosub",
106 "goto",
107 "if",
108 "imp",
109 "implements",
110 "in",
111 "input",
112 "integer",
113 "is",
114 "let",
115 "lib",
116 "like",
117 "line",
118 "line input",
119 "local",
120 "lock",
121 "long",
122 "loop",
123 "lprint",
124 "lset",
125 "mod",
126 "name",
127 "new",
128 "next",
129 "not",
130 "object",
131 "on",
132 "open",
133 "option",
134 "optional",
135 "or",
136 "output",
137 "preserve",
138 "print",
139 "private",
140 "property",
141 "public",
142 "random",
143 "read",
144 "redim",
145 "rem",
146 "resume",
147 "return",
148 "rset",
149 "select",
150 "set",
151 "shared",
152 "single",
153 "static",
154 "step",
155 "stop",
156 "string",
157 "sub",
158 "system",
159 "text",
160 "then",
161 "to",
162 "type",
163 "typeof",
164 "until",
165 "variant",
166 "wend",
167 "while",
168 "with",
169 "write",
170 "xor"
174 static const char* strListSqlKeyWords[] = {
175 "all",
176 "and",
177 "any",
178 "as",
179 "asc",
180 "avg",
181 "between",
182 "by",
183 "cast",
184 "corresponding",
185 "count",
186 "create",
187 "cross",
188 "delete",
189 "desc",
190 "distinct",
191 "drop",
192 "escape",
193 "except",
194 "exists",
195 "false",
196 "from",
197 "full",
198 "global",
199 "group",
200 "having",
201 "in",
202 "inner",
203 "insert",
204 "intersect",
205 "into",
206 "is",
207 "join",
208 "left",
209 "like",
210 "local",
211 "match",
212 "max",
213 "min",
214 "natural",
215 "not",
216 "null",
217 "on",
218 "or",
219 "order",
220 "outer",
221 "right",
222 "select",
223 "set",
224 "some",
225 "sum",
226 "table",
227 "temporary",
228 "true",
229 "union",
230 "unique",
231 "unknown",
232 "update",
233 "using",
234 "values",
235 "where"
239 extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
241 return strcmp( (char *)arg1, *(char **)arg2 );
245 class LetterTable
247 bool IsLetterTab[256];
249 public:
250 LetterTable( void );
252 inline bool isLetter( sal_Unicode c )
254 bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
255 return bRet;
257 bool isLetterUnicode( sal_Unicode c );
260 class BasicSimpleCharClass
262 static LetterTable aLetterTable;
264 public:
265 static BOOL isAlpha( sal_Unicode c, bool bCompatible )
267 BOOL bRet = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
268 || (bCompatible && aLetterTable.isLetter( c ));
269 return bRet;
272 static BOOL isDigit( sal_Unicode c )
274 BOOL bRet = (c >= '0' && c <= '9');
275 return bRet;
278 static BOOL isAlphaNumeric( sal_Unicode c, bool bCompatible )
280 BOOL bRet = isDigit( c ) || isAlpha( c, bCompatible );
281 return bRet;
285 LetterTable BasicSimpleCharClass::aLetterTable;
287 LetterTable::LetterTable( void )
289 for( int i = 0 ; i < 256 ; ++i )
290 IsLetterTab[i] = false;
292 IsLetterTab[0xC0] = true; // À , CAPITAL LETTER A WITH GRAVE ACCENT
293 IsLetterTab[0xC1] = true; // Á , CAPITAL LETTER A WITH ACUTE ACCENT
294 IsLetterTab[0xC2] = true; // Â , CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
295 IsLetterTab[0xC3] = true; // Ã , CAPITAL LETTER A WITH TILDE
296 IsLetterTab[0xC4] = true; // Ä , CAPITAL LETTER A WITH DIAERESIS
297 IsLetterTab[0xC5] = true; // Å , CAPITAL LETTER A WITH RING ABOVE
298 IsLetterTab[0xC6] = true; // Æ , CAPITAL LIGATURE AE
299 IsLetterTab[0xC7] = true; // Ç , CAPITAL LETTER C WITH CEDILLA
300 IsLetterTab[0xC8] = true; // È , CAPITAL LETTER E WITH GRAVE ACCENT
301 IsLetterTab[0xC9] = true; // É , CAPITAL LETTER E WITH ACUTE ACCENT
302 IsLetterTab[0xCA] = true; // Ê , CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
303 IsLetterTab[0xCB] = true; // Ë , CAPITAL LETTER E WITH DIAERESIS
304 IsLetterTab[0xCC] = true; // Ì , CAPITAL LETTER I WITH GRAVE ACCENT
305 IsLetterTab[0xCD] = true; // Í , CAPITAL LETTER I WITH ACUTE ACCENT
306 IsLetterTab[0xCE] = true; // Î , CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
307 IsLetterTab[0xCF] = true; // Ï , CAPITAL LETTER I WITH DIAERESIS
308 IsLetterTab[0xD0] = true; // Ð , CAPITAL LETTER ETH
309 IsLetterTab[0xD1] = true; // Ñ , CAPITAL LETTER N WITH TILDE
310 IsLetterTab[0xD2] = true; // Ò , CAPITAL LETTER O WITH GRAVE ACCENT
311 IsLetterTab[0xD3] = true; // Ó , CAPITAL LETTER O WITH ACUTE ACCENT
312 IsLetterTab[0xD4] = true; // Ô , CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
313 IsLetterTab[0xD5] = true; // Õ , CAPITAL LETTER O WITH TILDE
314 IsLetterTab[0xD6] = true; // Ö , CAPITAL LETTER O WITH DIAERESIS
315 IsLetterTab[0xD8] = true; // Ø , CAPITAL LETTER O WITH STROKE
316 IsLetterTab[0xD9] = true; // Ù , CAPITAL LETTER U WITH GRAVE ACCENT
317 IsLetterTab[0xDA] = true; // Ú , CAPITAL LETTER U WITH ACUTE ACCENT
318 IsLetterTab[0xDB] = true; // Û , CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
319 IsLetterTab[0xDC] = true; // Ü , CAPITAL LETTER U WITH DIAERESIS
320 IsLetterTab[0xDD] = true; // Ý , CAPITAL LETTER Y WITH ACUTE ACCENT
321 IsLetterTab[0xDE] = true; // Þ , CAPITAL LETTER THORN
322 IsLetterTab[0xDF] = true; // ß , SMALL LETTER SHARP S
323 IsLetterTab[0xE0] = true; // à , SMALL LETTER A WITH GRAVE ACCENT
324 IsLetterTab[0xE1] = true; // á , SMALL LETTER A WITH ACUTE ACCENT
325 IsLetterTab[0xE2] = true; // â , SMALL LETTER A WITH CIRCUMFLEX ACCENT
326 IsLetterTab[0xE3] = true; // ã , SMALL LETTER A WITH TILDE
327 IsLetterTab[0xE4] = true; // ä , SMALL LETTER A WITH DIAERESIS
328 IsLetterTab[0xE5] = true; // å , SMALL LETTER A WITH RING ABOVE
329 IsLetterTab[0xE6] = true; // æ , SMALL LIGATURE AE
330 IsLetterTab[0xE7] = true; // ç , SMALL LETTER C WITH CEDILLA
331 IsLetterTab[0xE8] = true; // è , SMALL LETTER E WITH GRAVE ACCENT
332 IsLetterTab[0xE9] = true; // é , SMALL LETTER E WITH ACUTE ACCENT
333 IsLetterTab[0xEA] = true; // ê , SMALL LETTER E WITH CIRCUMFLEX ACCENT
334 IsLetterTab[0xEB] = true; // ë , SMALL LETTER E WITH DIAERESIS
335 IsLetterTab[0xEC] = true; // ì , SMALL LETTER I WITH GRAVE ACCENT
336 IsLetterTab[0xED] = true; // í , SMALL LETTER I WITH ACUTE ACCENT
337 IsLetterTab[0xEE] = true; // î , SMALL LETTER I WITH CIRCUMFLEX ACCENT
338 IsLetterTab[0xEF] = true; // ï , SMALL LETTER I WITH DIAERESIS
339 IsLetterTab[0xF0] = true; // ð , SMALL LETTER ETH
340 IsLetterTab[0xF1] = true; // ñ , SMALL LETTER N WITH TILDE
341 IsLetterTab[0xF2] = true; // ò , SMALL LETTER O WITH GRAVE ACCENT
342 IsLetterTab[0xF3] = true; // ó , SMALL LETTER O WITH ACUTE ACCENT
343 IsLetterTab[0xF4] = true; // ô , SMALL LETTER O WITH CIRCUMFLEX ACCENT
344 IsLetterTab[0xF5] = true; // õ , SMALL LETTER O WITH TILDE
345 IsLetterTab[0xF6] = true; // ö , SMALL LETTER O WITH DIAERESIS
346 IsLetterTab[0xF8] = true; // ø , SMALL LETTER O WITH OBLIQUE BAR
347 IsLetterTab[0xF9] = true; // ù , SMALL LETTER U WITH GRAVE ACCENT
348 IsLetterTab[0xFA] = true; // ú , SMALL LETTER U WITH ACUTE ACCENT
349 IsLetterTab[0xFB] = true; // û , SMALL LETTER U WITH CIRCUMFLEX ACCENT
350 IsLetterTab[0xFC] = true; // ü , SMALL LETTER U WITH DIAERESIS
351 IsLetterTab[0xFD] = true; // ý , SMALL LETTER Y WITH ACUTE ACCENT
352 IsLetterTab[0xFE] = true; // þ , SMALL LETTER THORN
353 IsLetterTab[0xFF] = true; // ÿ , SMALL LETTER Y WITH DIAERESIS
356 bool LetterTable::isLetterUnicode( sal_Unicode c )
358 static CharClass* pCharClass = NULL;
359 if( pCharClass == NULL )
360 pCharClass = new CharClass( Application::GetSettings().GetLocale() );
361 String aStr( c );
362 bool bRet = pCharClass->isLetter( aStr, 0 );
363 return bRet;
366 // Hilfsfunktion: Zeichen-Flag Testen
367 BOOL SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, USHORT nTestFlags )
369 bool bRet = false;
370 if( c != 0 && c <= 255 )
372 bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
374 else if( c > 255 )
376 bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
377 ? BasicSimpleCharClass::isAlpha( c, true ) : false;
379 return bRet;
382 void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, UINT16 nCount )
384 ppListKeyWords = ppKeyWords;
385 nKeyWordCount = nCount;
388 // Neues Token holen
389 BOOL SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
390 /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
392 reType = TT_UNKNOWN;
394 // Position merken
395 rpStartPos = mpActualPos;
397 // Zeichen untersuchen
398 sal_Unicode c = peekChar();
399 if( c == CHAR_EOF )
400 return FALSE;
402 // Zeichen lesen
403 getChar();
405 //*** Alle Moeglichkeiten durchgehen ***
406 // Space?
407 if ( (testCharFlags( c, CHAR_SPACE ) == TRUE) )
409 while( testCharFlags( peekChar(), CHAR_SPACE ) == TRUE )
410 getChar();
412 reType = TT_WHITESPACE;
415 // Identifier?
416 else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == TRUE) )
418 BOOL bIdentifierChar;
421 // Naechstes Zeichen holen
422 c = peekChar();
423 bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
424 if( bIdentifierChar )
425 getChar();
427 while( bIdentifierChar );
429 reType = TT_IDENTIFIER;
431 // Schluesselwort-Tabelle
432 if (ppListKeyWords != NULL)
434 int nCount = mpActualPos - rpStartPos;
436 // No keyword if string contains char > 255
437 bool bCanBeKeyword = true;
438 for( int i = 0 ; i < nCount ; i++ )
440 if( rpStartPos[i] > 255 )
442 bCanBeKeyword = false;
443 break;
447 if( bCanBeKeyword )
449 String aKWString(rpStartPos, sal::static_int_cast< xub_StrLen >(nCount) );
450 ByteString aByteStr( aKWString, RTL_TEXTENCODING_ASCII_US );
451 aByteStr.ToLowerAscii();
452 if ( bsearch( aByteStr.GetBuffer(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
453 compare_strings ) )
455 reType = TT_KEYWORDS;
457 if ( aByteStr.Equals( "rem" ) )
459 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
460 sal_Unicode cPeek = peekChar();
461 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == FALSE )
463 c = getChar();
464 cPeek = peekChar();
467 reType = TT_COMMENT;
474 // Operator?
475 // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
476 else if ( ( testCharFlags( c, CHAR_OPERATOR ) == TRUE ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
478 // paramters for SQL view
479 if ( (c==':') || (c=='?'))
481 if (c!='?')
483 BOOL bIdentifierChar;
486 // Naechstes Zeichen holen
487 c = peekChar();
488 bIdentifierChar = BasicSimpleCharClass::isAlpha( c, true );
489 if( bIdentifierChar )
490 getChar();
492 while( bIdentifierChar );
494 reType = TT_PARAMETER;
496 else if ((c=='-'))
498 sal_Unicode cPeekNext = peekChar();
499 if (cPeekNext=='-')
501 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
502 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == FALSE )
504 getChar();
505 cPeekNext = peekChar();
507 reType = TT_COMMENT;
510 else if (c=='/')
512 sal_Unicode cPeekNext = peekChar();
513 if (cPeekNext=='/')
515 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
516 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == FALSE )
518 getChar();
519 cPeekNext = peekChar();
521 reType = TT_COMMENT;
524 else
526 // Kommentar ?
527 if ( c == '\'' )
529 c = getChar(); // '/' entfernen
531 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
532 sal_Unicode cPeek = peekChar();
533 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == FALSE )
535 getChar();
536 cPeek = peekChar();
539 reType = TT_COMMENT;
542 // Echter Operator, kann hier einfach behandelt werden,
543 // da nicht der wirkliche Operator, wie z.B. += interessiert,
544 // sondern nur die Tatsache, dass es sich um einen handelt.
545 if( reType != TT_COMMENT )
547 reType = TT_OPERATOR;
553 // Objekt-Trenner? Muss vor Number abgehandelt werden
554 else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
556 reType = TT_OPERATOR;
559 // Zahl?
560 else if( testCharFlags( c, CHAR_START_NUMBER ) == TRUE )
562 reType = TT_NUMBER;
564 // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
565 int nRadix = 10;
567 // Ist es eine Hex- oder Oct-Zahl?
568 if( c == '&' )
570 // Octal?
571 if( peekChar() == 'o' || peekChar() == 'O' )
573 // o entfernen
574 getChar();
575 nRadix = 8; // Octal-Basis
577 // Alle Ziffern einlesen
578 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
579 c = getChar();
581 // Hex?
582 else if( peekChar() == 'h' || peekChar() == 'H' )
584 // x entfernen
585 getChar();
586 nRadix = 16; // Hex-Basis
588 // Alle Ziffern einlesen und puffern
589 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
590 c = getChar();
592 else
594 reType = TT_OPERATOR;
598 // Wenn nicht Oct oder Hex als double ansehen
599 if( reType == TT_NUMBER && nRadix == 10 )
601 // Flag, ob das letzte Zeichen ein Exponent war
602 BOOL bAfterExpChar = FALSE;
604 // Alle Ziffern einlesen
605 while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
606 (bAfterExpChar && peekChar() == '+' ) ||
607 (bAfterExpChar && peekChar() == '-' ) )
608 // Nach Exponent auch +/- OK
610 c = getChar(); // Zeichen lesen
611 bAfterExpChar = ( c == 'e' || c == 'E' );
615 // reType = TT_NUMBER;
618 // String?
619 else if( testCharFlags( c, CHAR_START_STRING ) == TRUE )
621 // Merken, welches Zeichen den String eroeffnet hat
622 sal_Unicode cEndString = c;
623 if( c == '[' )
624 cEndString = ']';
626 // Alle Ziffern einlesen und puffern
627 while( peekChar() != cEndString )
629 // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
630 if( peekChar() == CHAR_EOF )
632 // ERROR: unterminated string literal
633 reType = TT_ERROR;
634 break;
636 c = getChar();
637 if( testCharFlags( c, CHAR_EOL ) == TRUE )
639 // ERROR: unterminated string literal
640 reType = TT_ERROR;
641 break;
645 // Zeichen lesen
646 if( reType != TT_ERROR )
648 getChar();
649 if( cEndString == ']' )
650 reType = TT_IDENTIFIER;
651 else
652 reType = TT_STRING;
656 // Zeilenende?
657 else if( testCharFlags( c, CHAR_EOL ) == TRUE )
659 // Falls ein weiteres anderes EOL-Char folgt, weg damit
660 sal_Unicode cNext = peekChar();
661 if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == TRUE )
662 getChar();
664 // Positions-Daten auf Zeilen-Beginn setzen
665 nCol = 0;
666 nLine++;
668 reType = TT_EOL;
671 // Alles andere bleibt TT_UNKNOWN
674 // End-Position eintragen
675 rpEndPos = mpActualPos;
676 return TRUE;
679 String SimpleTokenizer_Impl::getTokStr
680 ( /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
682 return String( pStartPos, (USHORT)( pEndPos - pStartPos ) );
685 #ifndef PRODUCT
686 // TEST: Token ausgeben
687 String SimpleTokenizer_Impl::getFullTokenStr( /*out*/TokenTypes eType,
688 /*out*/const sal_Unicode* pStartPos, /*out*/const sal_Unicode* pEndPos )
690 String aOut;
691 switch( eType )
693 case TT_UNKNOWN: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_UNKNOWN:") ); break;
694 case TT_IDENTIFIER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_IDENTIFIER:") ); break;
695 case TT_WHITESPACE: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_WHITESPACE:") ); break;
696 case TT_NUMBER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_NUMBER:") ); break;
697 case TT_STRING: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_STRING:") ); break;
698 case TT_EOL: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_EOL:") ); break;
699 case TT_COMMENT: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_COMMENT:") ); break;
700 case TT_ERROR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_ERROR:") ); break;
701 case TT_OPERATOR: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_OPERATOR:") ); break;
702 case TT_KEYWORDS: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_KEYWORD:") ); break;
703 case TT_PARAMETER: aOut = String( RTL_CONSTASCII_USTRINGPARAM("TT_PARAMETER:") ); break;
705 if( eType != TT_EOL )
707 aOut += String( pStartPos, (USHORT)( pEndPos - pStartPos ) );
709 aOut += String( RTL_CONSTASCII_USTRINGPARAM("\n") );
710 return aOut;
712 #endif
714 SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
716 memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
718 // Zeichen-Tabelle fuellen
719 USHORT i;
721 // Zulaessige Zeichen fuer Identifier
722 USHORT nHelpMask = (USHORT)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
723 for( i = 'a' ; i <= 'z' ; i++ )
724 aCharTypeTab[i] |= nHelpMask;
725 for( i = 'A' ; i <= 'Z' ; i++ )
726 aCharTypeTab[i] |= nHelpMask;
727 // '_' extra eintragen
728 aCharTypeTab[(int)'_'] |= nHelpMask;
729 // AB 23.6.97: '$' ist auch erlaubt
730 aCharTypeTab[(int)'$'] |= nHelpMask;
732 // Ziffern (Identifier und Number ist moeglich)
733 nHelpMask = (USHORT)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
734 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
735 for( i = '0' ; i <= '9' ; i++ )
736 aCharTypeTab[i] |= nHelpMask;
738 // e und E sowie . von Hand ergaenzen
739 aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
740 aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
741 aCharTypeTab[(int)'.'] |= (USHORT)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
742 aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
744 // Hex-Ziffern
745 for( i = 'a' ; i <= 'f' ; i++ )
746 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
747 for( i = 'A' ; i <= 'F' ; i++ )
748 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
750 // Oct-Ziffern
751 for( i = '0' ; i <= '7' ; i++ )
752 aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
754 // String-Beginn/End-Zeichen
755 aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
756 aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
757 aCharTypeTab[(int)'['] |= CHAR_START_STRING;
758 aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
760 // Operator-Zeichen
761 aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
762 aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
763 // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
764 aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
765 aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
766 aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
767 aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
768 aCharTypeTab[(int)','] |= CHAR_OPERATOR;
769 aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
770 aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
771 aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
772 aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
773 aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
774 aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
775 aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
776 aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
777 aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
778 aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
779 aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
780 aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
781 // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
782 aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
783 aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
785 // Space
786 aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
787 aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
789 // Zeilen-Ende-Zeichen
790 aCharTypeTab[(int)'\r'] |= CHAR_EOL;
791 aCharTypeTab[(int)'\n'] |= CHAR_EOL;
793 ppListKeyWords = NULL;
796 SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
800 SimpleTokenizer_Impl* getSimpleTokenizer( void )
802 static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
803 if( !pSimpleTokenizer )
804 pSimpleTokenizer = new SimpleTokenizer_Impl();
805 return pSimpleTokenizer;
808 // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
809 UINT16 SimpleTokenizer_Impl::parseLine( UINT32 nParseLine, const String* aSource )
811 // Position auf den Anfang des Source-Strings setzen
812 mpStringBegin = mpActualPos = aSource->GetBuffer();
814 // Zeile und Spalte initialisieren
815 nLine = nParseLine;
816 nCol = 0L;
818 // Variablen fuer die Out-Parameter
819 TokenTypes eType;
820 const sal_Unicode* pStartPos;
821 const sal_Unicode* pEndPos;
823 // Schleife ueber alle Tokens
824 UINT16 nTokenCount = 0;
825 while( getNextToken( eType, pStartPos, pEndPos ) )
826 nTokenCount++;
828 return nTokenCount;
831 void SimpleTokenizer_Impl::getHighlightPortions( UINT32 nParseLine, const String& rLine,
832 /*out*/HighlightPortions& portions )
834 // Position auf den Anfang des Source-Strings setzen
835 mpStringBegin = mpActualPos = rLine.GetBuffer();
837 // Zeile und Spalte initialisieren
838 nLine = nParseLine;
839 nCol = 0L;
841 // Variablen fuer die Out-Parameter
842 TokenTypes eType;
843 const sal_Unicode* pStartPos;
844 const sal_Unicode* pEndPos;
846 // Schleife ueber alle Tokens
847 while( getNextToken( eType, pStartPos, pEndPos ) )
849 HighlightPortion portion;
851 portion.nBegin = (UINT16)(pStartPos - mpStringBegin);
852 portion.nEnd = (UINT16)(pEndPos - mpStringBegin);
853 portion.tokenType = eType;
855 portions.Insert(portion, portions.Count());
860 //////////////////////////////////////////////////////////////////////////
861 // Implementierung des SyntaxHighlighter
863 SyntaxHighlighter::SyntaxHighlighter()
865 m_pSimpleTokenizer = 0;
866 m_pKeyWords = NULL;
867 m_nKeyWordCount = 0;
870 SyntaxHighlighter::~SyntaxHighlighter()
872 delete m_pSimpleTokenizer;
873 delete m_pKeyWords;
876 void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
878 eLanguage = eLanguage_;
879 delete m_pSimpleTokenizer;
880 m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
882 switch (eLanguage)
884 case HIGHLIGHT_BASIC:
885 m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
886 sizeof( strListBasicKeyWords ) / sizeof( char* ));
887 break;
888 case HIGHLIGHT_SQL:
889 m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
890 sizeof( strListSqlKeyWords ) / sizeof( char* ));
891 break;
892 default:
893 m_pSimpleTokenizer->setKeyWords( NULL, 0 );
897 const Range SyntaxHighlighter::notifyChange( UINT32 nLine, INT32 nLineCountDifference,
898 const String* pChangedLines, UINT32 nArrayLength)
900 (void)nLineCountDifference;
902 for( UINT32 i=0 ; i < nArrayLength ; i++ )
903 m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
905 return Range( nLine, nLine + nArrayLength-1 );
908 void SyntaxHighlighter::getHighlightPortions( UINT32 nLine, const String& rLine,
909 /*out*/HighlightPortions& portions )
911 m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );