Version 4.0.2.1, tag libreoffice-4.0.2.1
[LibreOffice.git] / svtools / source / edit / syntaxhighlight.cxx
blob7db7b722b660cd866ce5dff491fea99da0aaaeec
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <svtools/syntaxhighlight.hxx>
23 #include <unotools/charclass.hxx>
24 #include <comphelper/string.hxx>
26 // ##########################################################################
27 // ATTENTION: all these words needs to be in small caps
28 // ##########################################################################
29 static const char* strListBasicKeyWords[] = {
30 "access",
31 "alias",
32 "and",
33 "any",
34 "append",
35 "as",
36 "attribute",
37 "base",
38 "binary",
39 "boolean",
40 "byref",
41 "byte",
42 "byval",
43 "call",
44 "case",
45 "cdecl",
46 "classmodule",
47 "close",
48 "compare",
49 "compatible",
50 "const",
51 "currency",
52 "date",
53 "declare",
54 "defbool",
55 "defcur",
56 "defdate",
57 "defdbl",
58 "deferr",
59 "defint",
60 "deflng",
61 "defobj",
62 "defsng",
63 "defstr",
64 "defvar",
65 "dim",
66 "do",
67 "double",
68 "each",
69 "else",
70 "elseif",
71 "end",
72 "end enum",
73 "end function",
74 "end if",
75 "end property",
76 "end select",
77 "end sub",
78 "end type",
79 "endif",
80 "enum",
81 "eqv",
82 "erase",
83 "error",
84 "exit",
85 "explicit",
86 "for",
87 "function",
88 "get",
89 "global",
90 "gosub",
91 "goto",
92 "if",
93 "imp",
94 "implements",
95 "in",
96 "input",
97 "integer",
98 "is",
99 "let",
100 "lib",
101 "like",
102 "line",
103 "line input",
104 "local",
105 "lock",
106 "long",
107 "loop",
108 "lprint",
109 "lset",
110 "mod",
111 "name",
112 "new",
113 "next",
114 "not",
115 "object",
116 "on",
117 "open",
118 "option",
119 "optional",
120 "or",
121 "output",
122 "paramarray",
123 "preserve",
124 "print",
125 "private",
126 "property",
127 "public",
128 "random",
129 "read",
130 "redim",
131 "rem",
132 "resume",
133 "return",
134 "rset",
135 "select",
136 "set",
137 "shared",
138 "single",
139 "static",
140 "step",
141 "stop",
142 "string",
143 "sub",
144 "system",
145 "text",
146 "then",
147 "to",
148 "type",
149 "typeof",
150 "until",
151 "variant",
152 "vbasupport",
153 "wend",
154 "while",
155 "with",
156 "withevent",
157 "write",
158 "xor"
162 static const char* strListSqlKeyWords[] = {
163 "all",
164 "and",
165 "any",
166 "as",
167 "asc",
168 "avg",
169 "between",
170 "by",
171 "cast",
172 "corresponding",
173 "count",
174 "create",
175 "cross",
176 "delete",
177 "desc",
178 "distinct",
179 "drop",
180 "escape",
181 "except",
182 "exists",
183 "false",
184 "from",
185 "full",
186 "global",
187 "group",
188 "having",
189 "in",
190 "inner",
191 "insert",
192 "intersect",
193 "into",
194 "is",
195 "join",
196 "left",
197 "like",
198 "local",
199 "match",
200 "max",
201 "min",
202 "natural",
203 "not",
204 "null",
205 "on",
206 "or",
207 "order",
208 "outer",
209 "right",
210 "select",
211 "set",
212 "some",
213 "sum",
214 "table",
215 "temporary",
216 "true",
217 "union",
218 "unique",
219 "unknown",
220 "update",
221 "using",
222 "values",
223 "where"
227 extern "C" int CDECL compare_strings( const void *arg1, const void *arg2 )
229 return strcmp( (char *)arg1, *(char **)arg2 );
233 namespace
236 class LetterTable
238 bool IsLetterTab[256];
240 public:
241 LetterTable( void );
243 inline bool isLetter( sal_Unicode c )
245 bool bRet = (c < 256) ? IsLetterTab[c] : isLetterUnicode( c );
246 return bRet;
248 bool isLetterUnicode( sal_Unicode c );
251 static bool isAlpha(sal_Unicode c)
253 if (comphelper::string::isalphaAscii(c))
254 return true;
255 static LetterTable aLetterTable;
256 return aLetterTable.isLetter(c);
260 LetterTable::LetterTable( void )
262 for( int i = 0 ; i < 256 ; ++i )
263 IsLetterTab[i] = false;
265 IsLetterTab[0xC0] = true; // ?, CAPITAL LETTER A WITH GRAVE ACCENT
266 IsLetterTab[0xC1] = true; // ?, CAPITAL LETTER A WITH ACUTE ACCENT
267 IsLetterTab[0xC2] = true; // ?, CAPITAL LETTER A WITH CIRCUMFLEX ACCENT
268 IsLetterTab[0xC3] = true; // ?, CAPITAL LETTER A WITH TILDE
269 IsLetterTab[0xC4] = true; // ?, CAPITAL LETTER A WITH DIAERESIS
270 IsLetterTab[0xC5] = true; // ?, CAPITAL LETTER A WITH RING ABOVE
271 IsLetterTab[0xC6] = true; // ?, CAPITAL LIGATURE AE
272 IsLetterTab[0xC7] = true; // ?, CAPITAL LETTER C WITH CEDILLA
273 IsLetterTab[0xC8] = true; // ?, CAPITAL LETTER E WITH GRAVE ACCENT
274 IsLetterTab[0xC9] = true; // ?, CAPITAL LETTER E WITH ACUTE ACCENT
275 IsLetterTab[0xCA] = true; // ?, CAPITAL LETTER E WITH CIRCUMFLEX ACCENT
276 IsLetterTab[0xCB] = true; // ?, CAPITAL LETTER E WITH DIAERESIS
277 IsLetterTab[0xCC] = true; // ?, CAPITAL LETTER I WITH GRAVE ACCENT
278 IsLetterTab[0xCD] = true; // ?, CAPITAL LETTER I WITH ACUTE ACCENT
279 IsLetterTab[0xCE] = true; // ?, CAPITAL LETTER I WITH CIRCUMFLEX ACCENT
280 IsLetterTab[0xCF] = true; // ?, CAPITAL LETTER I WITH DIAERESIS
281 IsLetterTab[0xD0] = true; // ?, CAPITAL LETTER ETH
282 IsLetterTab[0xD1] = true; // ?, CAPITAL LETTER N WITH TILDE
283 IsLetterTab[0xD2] = true; // ?, CAPITAL LETTER O WITH GRAVE ACCENT
284 IsLetterTab[0xD3] = true; // ?, CAPITAL LETTER O WITH ACUTE ACCENT
285 IsLetterTab[0xD4] = true; // ?, CAPITAL LETTER O WITH CIRCUMFLEX ACCENT
286 IsLetterTab[0xD5] = true; // ?, CAPITAL LETTER O WITH TILDE
287 IsLetterTab[0xD6] = true; // ?, CAPITAL LETTER O WITH DIAERESIS
288 IsLetterTab[0xD8] = true; // ?, CAPITAL LETTER O WITH STROKE
289 IsLetterTab[0xD9] = true; // ?, CAPITAL LETTER U WITH GRAVE ACCENT
290 IsLetterTab[0xDA] = true; // ?, CAPITAL LETTER U WITH ACUTE ACCENT
291 IsLetterTab[0xDB] = true; // ?, CAPITAL LETTER U WITH CIRCUMFLEX ACCENT
292 IsLetterTab[0xDC] = true; // ?, CAPITAL LETTER U WITH DIAERESIS
293 IsLetterTab[0xDD] = true; // ?, CAPITAL LETTER Y WITH ACUTE ACCENT
294 IsLetterTab[0xDE] = true; // ?, CAPITAL LETTER THORN
295 IsLetterTab[0xDF] = true; // ?, SMALL LETTER SHARP S
296 IsLetterTab[0xE0] = true; // ?, SMALL LETTER A WITH GRAVE ACCENT
297 IsLetterTab[0xE1] = true; // ?, SMALL LETTER A WITH ACUTE ACCENT
298 IsLetterTab[0xE2] = true; // ?, SMALL LETTER A WITH CIRCUMFLEX ACCENT
299 IsLetterTab[0xE3] = true; // ?, SMALL LETTER A WITH TILDE
300 IsLetterTab[0xE4] = true; // ?, SMALL LETTER A WITH DIAERESIS
301 IsLetterTab[0xE5] = true; // ?, SMALL LETTER A WITH RING ABOVE
302 IsLetterTab[0xE6] = true; // ?, SMALL LIGATURE AE
303 IsLetterTab[0xE7] = true; // ?, SMALL LETTER C WITH CEDILLA
304 IsLetterTab[0xE8] = true; // ?, SMALL LETTER E WITH GRAVE ACCENT
305 IsLetterTab[0xE9] = true; // ?, SMALL LETTER E WITH ACUTE ACCENT
306 IsLetterTab[0xEA] = true; // ?, SMALL LETTER E WITH CIRCUMFLEX ACCENT
307 IsLetterTab[0xEB] = true; // ?, SMALL LETTER E WITH DIAERESIS
308 IsLetterTab[0xEC] = true; // ?, SMALL LETTER I WITH GRAVE ACCENT
309 IsLetterTab[0xED] = true; // ?, SMALL LETTER I WITH ACUTE ACCENT
310 IsLetterTab[0xEE] = true; // ?, SMALL LETTER I WITH CIRCUMFLEX ACCENT
311 IsLetterTab[0xEF] = true; // ?, SMALL LETTER I WITH DIAERESIS
312 IsLetterTab[0xF0] = true; // ?, SMALL LETTER ETH
313 IsLetterTab[0xF1] = true; // ?, SMALL LETTER N WITH TILDE
314 IsLetterTab[0xF2] = true; // ?, SMALL LETTER O WITH GRAVE ACCENT
315 IsLetterTab[0xF3] = true; // ?, SMALL LETTER O WITH ACUTE ACCENT
316 IsLetterTab[0xF4] = true; // ?, SMALL LETTER O WITH CIRCUMFLEX ACCENT
317 IsLetterTab[0xF5] = true; // ?, SMALL LETTER O WITH TILDE
318 IsLetterTab[0xF6] = true; // ?, SMALL LETTER O WITH DIAERESIS
319 IsLetterTab[0xF8] = true; // ?, SMALL LETTER O WITH OBLIQUE BAR
320 IsLetterTab[0xF9] = true; // ?, SMALL LETTER U WITH GRAVE ACCENT
321 IsLetterTab[0xFA] = true; // ?, SMALL LETTER U WITH ACUTE ACCENT
322 IsLetterTab[0xFB] = true; // ?, SMALL LETTER U WITH CIRCUMFLEX ACCENT
323 IsLetterTab[0xFC] = true; // ?, SMALL LETTER U WITH DIAERESIS
324 IsLetterTab[0xFD] = true; // ?, SMALL LETTER Y WITH ACUTE ACCENT
325 IsLetterTab[0xFE] = true; // ?, SMALL LETTER THORN
326 IsLetterTab[0xFF] = true; // � , SMALL LETTER Y WITH DIAERESIS
329 bool LetterTable::isLetterUnicode( sal_Unicode c )
331 static CharClass* pCharClass = NULL;
332 if( pCharClass == NULL )
333 pCharClass = new CharClass( Application::GetSettings().GetLanguageTag() );
334 rtl::OUString aStr( c );
335 bool bRet = pCharClass->isLetter( aStr, 0 );
336 return bRet;
339 // Hilfsfunktion: Zeichen-Flag Testen
340 sal_Bool SimpleTokenizer_Impl::testCharFlags( sal_Unicode c, sal_uInt16 nTestFlags )
342 bool bRet = false;
343 if( c != 0 && c <= 255 )
345 bRet = ( (aCharTypeTab[c] & nTestFlags) != 0 );
347 else if( c > 255 )
349 bRet = (( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER ) & nTestFlags) != 0
350 ? isAlpha(c) : false;
352 return bRet;
355 void SimpleTokenizer_Impl::setKeyWords( const char** ppKeyWords, sal_uInt16 nCount )
357 ppListKeyWords = ppKeyWords;
358 nKeyWordCount = nCount;
361 // Neues Token holen
362 sal_Bool SimpleTokenizer_Impl::getNextToken( /*out*/TokenTypes& reType,
363 /*out*/const sal_Unicode*& rpStartPos, /*out*/const sal_Unicode*& rpEndPos )
365 reType = TT_UNKNOWN;
367 // Position merken
368 rpStartPos = mpActualPos;
370 // Zeichen untersuchen
371 sal_Unicode c = peekChar();
372 if( c == CHAR_EOF )
373 return sal_False;
375 // Zeichen lesen
376 getChar();
378 //*** Alle Moeglichkeiten durchgehen ***
379 // Space?
380 if ( (testCharFlags( c, CHAR_SPACE ) == sal_True) )
382 while( testCharFlags( peekChar(), CHAR_SPACE ) == sal_True )
383 getChar();
385 reType = TT_WHITESPACE;
388 // Identifier?
389 else if ( (testCharFlags( c, CHAR_START_IDENTIFIER ) == sal_True) )
391 sal_Bool bIdentifierChar;
394 // Naechstes Zeichen holen
395 c = peekChar();
396 bIdentifierChar = testCharFlags( c, CHAR_IN_IDENTIFIER );
397 if( bIdentifierChar )
398 getChar();
400 while( bIdentifierChar );
402 reType = TT_IDENTIFIER;
404 // Schluesselwort-Tabelle
405 if (ppListKeyWords != NULL)
407 int nCount = mpActualPos - rpStartPos;
409 // No keyword if string contains char > 255
410 bool bCanBeKeyword = true;
411 for( int i = 0 ; i < nCount ; i++ )
413 if( rpStartPos[i] > 255 )
415 bCanBeKeyword = false;
416 break;
420 if( bCanBeKeyword )
422 rtl::OUString aKWString(rpStartPos, nCount);
423 rtl::OString aByteStr = rtl::OUStringToOString(aKWString,
424 RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase();
425 if ( bsearch( aByteStr.getStr(), ppListKeyWords, nKeyWordCount, sizeof( char* ),
426 compare_strings ) )
428 reType = TT_KEYWORDS;
430 if (aByteStr.equalsL(RTL_CONSTASCII_STRINGPARAM("rem")))
432 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
433 sal_Unicode cPeek = peekChar();
434 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
436 c = getChar();
437 cPeek = peekChar();
440 reType = TT_COMMENT;
447 // Operator?
448 // only for BASIC '\'' should be a comment, otherwise it is a normal string and handled there
449 else if ( ( testCharFlags( c, CHAR_OPERATOR ) == sal_True ) || ( (c == '\'') && (aLanguage==HIGHLIGHT_BASIC)) )
451 // parameters for SQL view
452 if ( (c==':') || (c=='?'))
454 if (c!='?')
456 sal_Bool bIdentifierChar;
459 // Naechstes Zeichen holen
460 c = peekChar();
461 bIdentifierChar = isAlpha(c);
462 if( bIdentifierChar )
463 getChar();
465 while( bIdentifierChar );
467 reType = TT_PARAMETER;
469 else if (c=='-')
471 sal_Unicode cPeekNext = peekChar();
472 if (cPeekNext=='-')
474 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
475 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
477 getChar();
478 cPeekNext = peekChar();
480 reType = TT_COMMENT;
483 else if (c=='/')
485 sal_Unicode cPeekNext = peekChar();
486 if (cPeekNext=='/')
488 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
489 while( cPeekNext != CHAR_EOF && testCharFlags( cPeekNext, CHAR_EOL ) == sal_False )
491 getChar();
492 cPeekNext = peekChar();
494 reType = TT_COMMENT;
497 else
499 // Kommentar ?
500 if ( c == '\'' )
502 c = getChar(); // '/' entfernen
504 // Alle Zeichen bis Zeilen-Ende oder EOF entfernen
505 sal_Unicode cPeek = c;
506 while( cPeek != CHAR_EOF && testCharFlags( cPeek, CHAR_EOL ) == sal_False )
508 getChar();
509 cPeek = peekChar();
512 reType = TT_COMMENT;
515 // Echter Operator, kann hier einfach behandelt werden,
516 // da nicht der wirkliche Operator, wie z.B. += interessiert,
517 // sondern nur die Tatsache, dass es sich um einen handelt.
518 if( reType != TT_COMMENT )
520 reType = TT_OPERATOR;
526 // Objekt-Trenner? Muss vor Number abgehandelt werden
527 else if( c == '.' && ( peekChar() < '0' || peekChar() > '9' ) )
529 reType = TT_OPERATOR;
532 // Zahl?
533 else if( testCharFlags( c, CHAR_START_NUMBER ) == sal_True )
535 reType = TT_NUMBER;
537 // Zahlensystem, 10 = normal, wird bei Oct/Hex geaendert
538 int nRadix = 10;
540 // Ist es eine Hex- oder Oct-Zahl?
541 if( c == '&' )
543 // Octal?
544 if( peekChar() == 'o' || peekChar() == 'O' )
546 // o entfernen
547 getChar();
548 nRadix = 8; // Octal-Basis
550 // Alle Ziffern einlesen
551 while( testCharFlags( peekChar(), CHAR_IN_OCT_NUMBER ) )
552 c = getChar();
554 // Hex?
555 else if( peekChar() == 'h' || peekChar() == 'H' )
557 // x entfernen
558 getChar();
559 nRadix = 16; // Hex-Basis
561 // Alle Ziffern einlesen und puffern
562 while( testCharFlags( peekChar(), CHAR_IN_HEX_NUMBER ) )
563 c = getChar();
565 else
567 reType = TT_OPERATOR;
571 // Wenn nicht Oct oder Hex als double ansehen
572 if( reType == TT_NUMBER && nRadix == 10 )
574 // Flag, ob das letzte Zeichen ein Exponent war
575 sal_Bool bAfterExpChar = sal_False;
577 // Alle Ziffern einlesen
578 while( testCharFlags( peekChar(), CHAR_IN_NUMBER ) ||
579 (bAfterExpChar && peekChar() == '+' ) ||
580 (bAfterExpChar && peekChar() == '-' ) )
581 // Nach Exponent auch +/- OK
583 c = getChar(); // Zeichen lesen
584 bAfterExpChar = ( c == 'e' || c == 'E' );
588 // reType = TT_NUMBER;
591 // String?
592 else if( testCharFlags( c, CHAR_START_STRING ) == sal_True )
594 // Merken, welches Zeichen den String eroeffnet hat
595 sal_Unicode cEndString = c;
596 if( c == '[' )
597 cEndString = ']';
599 // Alle Ziffern einlesen und puffern
600 while( peekChar() != cEndString )
602 // #58846 EOF vor getChar() abfangen, damit EOF micht verloren geht
603 if( peekChar() == CHAR_EOF )
605 // ERROR: unterminated string literal
606 reType = TT_ERROR;
607 break;
609 c = getChar();
610 if( testCharFlags( c, CHAR_EOL ) == sal_True )
612 // ERROR: unterminated string literal
613 reType = TT_ERROR;
614 break;
618 // Zeichen lesen
619 if( reType != TT_ERROR )
621 getChar();
622 if( cEndString == ']' )
623 reType = TT_IDENTIFIER;
624 else
625 reType = TT_STRING;
629 // Zeilenende?
630 else if( testCharFlags( c, CHAR_EOL ) == sal_True )
632 // Falls ein weiteres anderes EOL-Char folgt, weg damit
633 sal_Unicode cNext = peekChar();
634 if( cNext != c && testCharFlags( cNext, CHAR_EOL ) == sal_True )
635 getChar();
637 // Positions-Daten auf Zeilen-Beginn setzen
638 nCol = 0;
639 nLine++;
641 reType = TT_EOL;
644 // Alles andere bleibt TT_UNKNOWN
647 // End-Position eintragen
648 rpEndPos = mpActualPos;
649 return sal_True;
652 SimpleTokenizer_Impl::SimpleTokenizer_Impl( HighlighterLanguage aLang ): aLanguage(aLang)
654 memset( aCharTypeTab, 0, sizeof( aCharTypeTab ) );
656 // Zeichen-Tabelle fuellen
657 sal_uInt16 i;
659 // Zulaessige Zeichen fuer Identifier
660 sal_uInt16 nHelpMask = (sal_uInt16)( CHAR_START_IDENTIFIER | CHAR_IN_IDENTIFIER );
661 for( i = 'a' ; i <= 'z' ; i++ )
662 aCharTypeTab[i] |= nHelpMask;
663 for( i = 'A' ; i <= 'Z' ; i++ )
664 aCharTypeTab[i] |= nHelpMask;
665 // '_' extra eintragen
666 aCharTypeTab[(int)'_'] |= nHelpMask;
667 // AB 23.6.97: '$' ist auch erlaubt
668 aCharTypeTab[(int)'$'] |= nHelpMask;
670 // Ziffern (Identifier und Number ist moeglich)
671 nHelpMask = (sal_uInt16)( CHAR_IN_IDENTIFIER | CHAR_START_NUMBER |
672 CHAR_IN_NUMBER | CHAR_IN_HEX_NUMBER );
673 for( i = '0' ; i <= '9' ; i++ )
674 aCharTypeTab[i] |= nHelpMask;
676 // e und E sowie . von Hand ergaenzen
677 aCharTypeTab[(int)'e'] |= CHAR_IN_NUMBER;
678 aCharTypeTab[(int)'E'] |= CHAR_IN_NUMBER;
679 aCharTypeTab[(int)'.'] |= (sal_uInt16)( CHAR_IN_NUMBER | CHAR_START_NUMBER );
680 aCharTypeTab[(int)'&'] |= CHAR_START_NUMBER;
682 // Hex-Ziffern
683 for( i = 'a' ; i <= 'f' ; i++ )
684 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
685 for( i = 'A' ; i <= 'F' ; i++ )
686 aCharTypeTab[i] |= CHAR_IN_HEX_NUMBER;
688 // Oct-Ziffern
689 for( i = '0' ; i <= '7' ; i++ )
690 aCharTypeTab[i] |= CHAR_IN_OCT_NUMBER;
692 // String-Beginn/End-Zeichen
693 aCharTypeTab[(int)'\''] |= CHAR_START_STRING;
694 aCharTypeTab[(int)'\"'] |= CHAR_START_STRING;
695 aCharTypeTab[(int)'['] |= CHAR_START_STRING;
696 aCharTypeTab[(int)'`'] |= CHAR_START_STRING;
698 // Operator-Zeichen
699 aCharTypeTab[(int)'!'] |= CHAR_OPERATOR;
700 aCharTypeTab[(int)'%'] |= CHAR_OPERATOR;
701 // aCharTypeTab[(int)'&'] |= CHAR_OPERATOR; Removed because of #i14140
702 aCharTypeTab[(int)'('] |= CHAR_OPERATOR;
703 aCharTypeTab[(int)')'] |= CHAR_OPERATOR;
704 aCharTypeTab[(int)'*'] |= CHAR_OPERATOR;
705 aCharTypeTab[(int)'+'] |= CHAR_OPERATOR;
706 aCharTypeTab[(int)','] |= CHAR_OPERATOR;
707 aCharTypeTab[(int)'-'] |= CHAR_OPERATOR;
708 aCharTypeTab[(int)'/'] |= CHAR_OPERATOR;
709 aCharTypeTab[(int)':'] |= CHAR_OPERATOR;
710 aCharTypeTab[(int)'<'] |= CHAR_OPERATOR;
711 aCharTypeTab[(int)'='] |= CHAR_OPERATOR;
712 aCharTypeTab[(int)'>'] |= CHAR_OPERATOR;
713 aCharTypeTab[(int)'?'] |= CHAR_OPERATOR;
714 aCharTypeTab[(int)'^'] |= CHAR_OPERATOR;
715 aCharTypeTab[(int)'|'] |= CHAR_OPERATOR;
716 aCharTypeTab[(int)'~'] |= CHAR_OPERATOR;
717 aCharTypeTab[(int)'{'] |= CHAR_OPERATOR;
718 aCharTypeTab[(int)'}'] |= CHAR_OPERATOR;
719 // aCharTypeTab[(int)'['] |= CHAR_OPERATOR; Removed because of #i17826
720 aCharTypeTab[(int)']'] |= CHAR_OPERATOR;
721 aCharTypeTab[(int)';'] |= CHAR_OPERATOR;
723 // Space
724 aCharTypeTab[(int)' ' ] |= CHAR_SPACE;
725 aCharTypeTab[(int)'\t'] |= CHAR_SPACE;
727 // Zeilen-Ende-Zeichen
728 aCharTypeTab[(int)'\r'] |= CHAR_EOL;
729 aCharTypeTab[(int)'\n'] |= CHAR_EOL;
731 ppListKeyWords = NULL;
734 SimpleTokenizer_Impl::~SimpleTokenizer_Impl( void )
738 SimpleTokenizer_Impl* getSimpleTokenizer( void )
740 static SimpleTokenizer_Impl* pSimpleTokenizer = NULL;
741 if( !pSimpleTokenizer )
742 pSimpleTokenizer = new SimpleTokenizer_Impl();
743 return pSimpleTokenizer;
746 // Heraussuchen der jeweils naechsten Funktion aus einem JavaScript-Modul
747 sal_uInt16 SimpleTokenizer_Impl::parseLine( sal_uInt32 nParseLine, const String* aSource )
749 // Position auf den Anfang des Source-Strings setzen
750 mpStringBegin = mpActualPos = aSource->GetBuffer();
752 // Zeile und Spalte initialisieren
753 nLine = nParseLine;
754 nCol = 0L;
756 // Variablen fuer die Out-Parameter
757 TokenTypes eType;
758 const sal_Unicode* pStartPos;
759 const sal_Unicode* pEndPos;
761 // Schleife ueber alle Tokens
762 sal_uInt16 nTokenCount = 0;
763 while( getNextToken( eType, pStartPos, pEndPos ) )
764 nTokenCount++;
766 return nTokenCount;
769 void SimpleTokenizer_Impl::getHighlightPortions( sal_uInt32 nParseLine, const String& rLine,
770 /*out*/HighlightPortions& portions )
772 // Position auf den Anfang des Source-Strings setzen
773 mpStringBegin = mpActualPos = rLine.GetBuffer();
775 // Zeile und Spalte initialisieren
776 nLine = nParseLine;
777 nCol = 0L;
779 // Variablen fuer die Out-Parameter
780 TokenTypes eType;
781 const sal_Unicode* pStartPos;
782 const sal_Unicode* pEndPos;
784 // Schleife ueber alle Tokens
785 while( getNextToken( eType, pStartPos, pEndPos ) )
787 HighlightPortion portion;
789 portion.nBegin = (sal_uInt16)(pStartPos - mpStringBegin);
790 portion.nEnd = (sal_uInt16)(pEndPos - mpStringBegin);
791 portion.tokenType = eType;
793 portions.push_back(portion);
798 //////////////////////////////////////////////////////////////////////////
799 // Implementierung des SyntaxHighlighter
801 SyntaxHighlighter::SyntaxHighlighter()
803 m_pSimpleTokenizer = 0;
804 m_pKeyWords = NULL;
805 m_nKeyWordCount = 0;
808 SyntaxHighlighter::~SyntaxHighlighter()
810 delete m_pSimpleTokenizer;
811 delete m_pKeyWords;
814 void SyntaxHighlighter::initialize( HighlighterLanguage eLanguage_ )
816 eLanguage = eLanguage_;
817 delete m_pSimpleTokenizer;
818 m_pSimpleTokenizer = new SimpleTokenizer_Impl(eLanguage);
820 switch (eLanguage)
822 case HIGHLIGHT_BASIC:
823 m_pSimpleTokenizer->setKeyWords( strListBasicKeyWords,
824 sizeof( strListBasicKeyWords ) / sizeof( char* ));
825 break;
826 case HIGHLIGHT_SQL:
827 m_pSimpleTokenizer->setKeyWords( strListSqlKeyWords,
828 sizeof( strListSqlKeyWords ) / sizeof( char* ));
829 break;
830 default:
831 m_pSimpleTokenizer->setKeyWords( NULL, 0 );
835 const Range SyntaxHighlighter::notifyChange( sal_uInt32 nLine, sal_Int32 nLineCountDifference,
836 const String* pChangedLines, sal_uInt32 nArrayLength)
838 (void)nLineCountDifference;
840 for( sal_uInt32 i=0 ; i < nArrayLength ; i++ )
841 m_pSimpleTokenizer->parseLine(nLine+i, &pChangedLines[i]);
843 return Range( nLine, nLine + nArrayLength-1 );
846 void SyntaxHighlighter::getHighlightPortions( sal_uInt32 nLine, const String& rLine,
847 /*out*/HighlightPortions& portions )
849 m_pSimpleTokenizer->getHighlightPortions( nLine, rLine, portions );
852 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */