1 // Scintilla source code edit control
4 ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
8 // The License.txt file describes the conditions under which this software may be distributed.
18 #include <string_view>
23 #include "Scintilla.h"
27 #include "LexAccessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "DefaultLexer.h"
34 using namespace Scintilla
;
35 using namespace Lexilla
;
37 // Info for HERE document handling from perldata.pod (reformatted):
38 // ----------------------------------------------------------------
39 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
40 // Following a << you specify a string to terminate the quoted material, and
41 // all lines following the current line down to the terminating string are
42 // the value of the item.
43 // Prefixing the terminating string with a "~" specifies that you want to
44 // use "Indented Here-docs" (see below).
45 // * The terminating string may be either an identifier (a word), or some
47 // * If quoted, the type of quotes you use determines the treatment of the
48 // text, just as in regular quoting.
49 // * An unquoted identifier works like double quotes.
50 // * There must be no space between the << and the identifier.
51 // (If you put a space it will be treated as a null identifier,
52 // which is valid, and matches the first empty line.)
53 // (This is deprecated, -w warns of this syntax)
54 // * The terminating string must appear by itself (unquoted and
55 // with no surrounding whitespace) on the terminating line.
59 // The here-doc modifier "~" allows you to indent your here-docs to
60 // make the code more readable.
61 // The delimiter is used to determine the exact whitespace to remove
62 // from the beginning of each line. All lines must have at least the
63 // same starting whitespace (except lines only containing a newline)
64 // or perl will croak. Tabs and spaces can be mixed, but are matched
65 // exactly. One tab will not be equal to 8 spaces!
66 // Additional beginning whitespace (beyond what preceded the
67 // delimiter) will be preserved.
69 #define HERE_DELIM_MAX 256 // maximum length of HERE doc delimiter
71 #define PERLNUM_BINARY 1 // order is significant: 1-3 cannot have a dot
72 #define PERLNUM_OCTAL 2
73 #define PERLNUM_FLOAT_EXP 3 // exponent part only
74 #define PERLNUM_HEX 4 // may be a hex float
75 #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
76 #define PERLNUM_VECTOR 6
77 #define PERLNUM_V_VECTOR 7
80 #define BACK_NONE 0 // lookback state for bareword disambiguation:
81 #define BACK_OPERATOR 1 // whitespace/comments are insignificant
82 #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
84 #define SUB_BEGIN 0 // states for subroutine prototype scan:
85 #define SUB_HAS_PROTO 1 // only 'prototype' attribute allows prototypes
86 #define SUB_HAS_ATTRIB 2 // other attributes can exist leftward
87 #define SUB_HAS_MODULE 3 // sub name can have a ::identifier part
88 #define SUB_HAS_SUB 4 // 'sub' keyword
90 // all interpolated styles are different from their parent styles by a constant difference
91 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
92 #define INTERPOLATE_SHIFT (SCE_PL_STRING_VAR - SCE_PL_STRING)
94 static bool isPerlKeyword(Sci_PositionU start
, Sci_PositionU end
, WordList
&keywords
, LexAccessor
&styler
) {
95 // old-style keyword matcher; needed because GetCurrent() needs
96 // current segment to be committed, but we may abandon early...
98 Sci_PositionU i
, len
= end
- start
;
99 if (len
> 30) { len
= 30; }
100 for (i
= 0; i
< len
; i
++, start
++) s
[i
] = styler
[start
];
102 return keywords
.InList(s
);
105 static int disambiguateBareword(LexAccessor
&styler
, Sci_PositionU bk
, Sci_PositionU fw
,
106 int backFlag
, Sci_PositionU backPos
, Sci_PositionU endPos
) {
107 // identifiers are recognized by Perl as barewords under some
108 // conditions, the following attempts to do the disambiguation
109 // by looking backward and forward; result in 2 LSB
111 bool moreback
= false; // true if passed newline/comments
112 bool brace
= false; // true if opening brace found
113 // if BACK_NONE, neither operator nor keyword, so skip test
114 if (backFlag
== BACK_NONE
)
116 // first look backwards past whitespace/comments to set EOL flag
117 // (some disambiguation patterns must be on a single line)
118 if (backPos
<= static_cast<Sci_PositionU
>(styler
.LineStart(styler
.GetLine(bk
))))
120 // look backwards at last significant lexed item for disambiguation
122 int ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
123 if (ch
== '{' && !moreback
) {
124 // {bareword: possible variable spec
126 } else if ((ch
== '&' && styler
.SafeGetCharAt(bk
- 1) != '&')
127 // &bareword: subroutine call
128 || styler
.Match(bk
- 1, "->")
129 // ->bareword: part of variable spec
130 || styler
.Match(bk
- 1, "::")
131 // ::bareword: part of module spec
132 || styler
.Match(bk
- 2, "sub")) {
133 // sub bareword: subroutine declaration
134 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
137 // next, scan forward after word past tab/spaces only;
138 // if ch isn't one of '[{(,' we can skip the test
139 if ((ch
== '{' || ch
== '(' || ch
== '['|| ch
== ',')
141 while (IsASpaceOrTab(ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(fw
)))
145 if ((ch
== '}' && brace
)
146 // {bareword}: variable spec
147 || styler
.Match(fw
, "=>")) {
148 // [{(, bareword=>: hash literal
155 static void skipWhitespaceComment(LexAccessor
&styler
, Sci_PositionU
&p
) {
156 // when backtracking, we need to skip whitespace and comments
158 const int style
= styler
.StyleAt(p
);
159 if (style
!= SCE_PL_DEFAULT
&& style
!= SCE_PL_COMMENTLINE
)
165 static int findPrevLexeme(LexAccessor
&styler
, Sci_PositionU
&bk
, int &style
) {
166 // scan backward past whitespace and comments to find a lexeme
167 skipWhitespaceComment(styler
, bk
);
171 style
= styler
.StyleAt(bk
);
172 while (bk
> 0) { // find extent of lexeme
173 if (styler
.StyleAt(bk
- 1) == style
) {
181 static int styleBeforeBracePair(LexAccessor
&styler
, Sci_PositionU bk
) {
182 // backtrack to find open '{' corresponding to a '}', balanced
183 // return significant style to be tested for '/' disambiguation
186 return SCE_PL_DEFAULT
;
188 if (styler
.StyleAt(bk
) == SCE_PL_OPERATOR
) {
189 int bkch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
190 if (bkch
== ';') { // early out
192 } else if (bkch
== '}') {
194 } else if (bkch
== '{') {
195 if (--braceCount
== 0) break;
199 if (bk
> 0 && braceCount
== 0) {
200 // balanced { found, bk > 0, skip more whitespace/comments
202 skipWhitespaceComment(styler
, bk
);
203 return styler
.StyleAt(bk
);
205 return SCE_PL_DEFAULT
;
208 static int styleCheckIdentifier(LexAccessor
&styler
, Sci_PositionU bk
) {
209 // backtrack to classify sub-styles of identifier under test
210 // return sub-style to be tested for '/' disambiguation
211 if (styler
.SafeGetCharAt(bk
) == '>') // inputsymbol, like <foo>
213 // backtrack to check for possible "->" or "::" before identifier
214 while (bk
> 0 && styler
.StyleAt(bk
) == SCE_PL_IDENTIFIER
) {
218 int bkstyle
= styler
.StyleAt(bk
);
219 if (bkstyle
== SCE_PL_DEFAULT
220 || bkstyle
== SCE_PL_COMMENTLINE
) {
221 // skip whitespace, comments
222 } else if (bkstyle
== SCE_PL_OPERATOR
) {
223 // test for "->" and "::"
224 if (styler
.Match(bk
- 1, "->") || styler
.Match(bk
- 1, "::"))
227 return 3; // bare identifier
233 static int podLineScan(LexAccessor
&styler
, Sci_PositionU
&pos
, Sci_PositionU endPos
) {
234 // forward scan the current line to classify line for POD style
236 while (pos
< endPos
) {
237 int ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(pos
));
238 if (ch
== '\n' || ch
== '\r') {
239 if (ch
== '\r' && styler
.SafeGetCharAt(pos
+ 1) == '\n') pos
++;
242 if (IsASpaceOrTab(ch
)) { // whitespace, take note
244 state
= SCE_PL_DEFAULT
;
245 } else if (state
== SCE_PL_DEFAULT
) { // verbatim POD line
246 state
= SCE_PL_POD_VERB
;
247 } else if (state
!= SCE_PL_POD_VERB
) { // regular POD line
253 state
= SCE_PL_DEFAULT
;
257 static bool styleCheckSubPrototype(LexAccessor
&styler
, Sci_PositionU bk
) {
258 // backtrack to identify if we're starting a subroutine prototype
259 // we also need to ignore whitespace/comments, format is like:
260 // sub abc::pqr :const :prototype(...)
261 // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
262 // and a state machine generates legal subroutine syntax matches
264 int state
= SUB_BEGIN
;
266 // find two lexemes, lexeme 2 follows lexeme 1
267 int style2
= SCE_PL_DEFAULT
;
268 Sci_PositionU pos2
= bk
;
269 int len2
= findPrevLexeme(styler
, pos2
, style2
);
270 int style1
= SCE_PL_DEFAULT
;
271 Sci_PositionU pos1
= pos2
;
272 if (pos1
> 0) pos1
--;
273 int len1
= findPrevLexeme(styler
, pos1
, style1
);
274 if (len1
== 0 || len2
== 0) // lexeme pair must exist
277 // match parts of syntax, if invalid subroutine syntax, break off
278 if (style1
== SCE_PL_OPERATOR
&& len1
== 1 &&
279 styler
.SafeGetCharAt(pos1
) == ':') { // ':'
280 if (style2
== SCE_PL_IDENTIFIER
|| style2
== SCE_PL_WORD
) {
281 if (len2
== 9 && styler
.Match(pos2
, "prototype")) { // ':' 'prototype'
282 if (state
== SUB_BEGIN
) {
283 state
= SUB_HAS_PROTO
;
286 } else { // ':' <attribute>
287 if (state
== SUB_HAS_PROTO
|| state
== SUB_HAS_ATTRIB
) {
288 state
= SUB_HAS_ATTRIB
;
294 } else if (style1
== SCE_PL_OPERATOR
&& len1
== 2 &&
295 styler
.Match(pos1
, "::")) { // '::'
296 if (style2
== SCE_PL_IDENTIFIER
) { // '::' <identifier>
297 state
= SUB_HAS_MODULE
;
300 } else if (style1
== SCE_PL_WORD
&& len1
== 3 &&
301 styler
.Match(pos1
, "sub")) { // 'sub'
302 if (style2
== SCE_PL_IDENTIFIER
) { // 'sub' <identifier>
308 bk
= pos1
; // set position for finding next lexeme pair
310 } while (state
!= SUB_HAS_SUB
);
311 return (state
== SUB_HAS_SUB
);
314 static int actualNumStyle(int numberStyle
) {
315 if (numberStyle
== PERLNUM_VECTOR
|| numberStyle
== PERLNUM_V_VECTOR
) {
316 return SCE_PL_STRING
;
317 } else if (numberStyle
== PERLNUM_BAD
) {
320 return SCE_PL_NUMBER
;
323 static int opposite(int ch
) {
324 if (ch
== '(') return ')';
325 if (ch
== '[') return ']';
326 if (ch
== '{') return '}';
327 if (ch
== '<') return '>';
331 static bool IsCommentLine(Sci_Position line
, LexAccessor
&styler
) {
332 Sci_Position pos
= styler
.LineStart(line
);
333 Sci_Position eol_pos
= styler
.LineStart(line
+ 1) - 1;
334 for (Sci_Position i
= pos
; i
< eol_pos
; i
++) {
336 int style
= styler
.StyleAt(i
);
337 if (ch
== '#' && style
== SCE_PL_COMMENTLINE
)
339 else if (!IsASpaceOrTab(ch
))
345 static bool IsPackageLine(Sci_Position line
, LexAccessor
&styler
) {
346 Sci_Position pos
= styler
.LineStart(line
);
347 int style
= styler
.StyleAt(pos
);
348 if (style
== SCE_PL_WORD
&& styler
.Match(pos
, "package")) {
354 static int PodHeadingLevel(Sci_Position pos
, LexAccessor
&styler
) {
355 int lvl
= static_cast<unsigned char>(styler
.SafeGetCharAt(pos
+ 5));
356 if (lvl
>= '1' && lvl
<= '4') {
362 // An individual named option for use in an OptionSet
364 // Options used for LexerPerl
369 // Custom folding of POD and packages
370 bool foldPOD
; // fold.perl.pod
371 // Enable folding Pod blocks when using the Perl lexer.
372 bool foldPackage
; // fold.perl.package
373 // Enable folding packages when using the Perl lexer.
375 bool foldCommentExplicit
;
385 foldCommentExplicit
= true;
390 static const char *const perlWordListDesc
[] = {
395 struct OptionSetPerl
: public OptionSet
<OptionsPerl
> {
397 DefineProperty("fold", &OptionsPerl::fold
);
399 DefineProperty("fold.comment", &OptionsPerl::foldComment
);
401 DefineProperty("fold.compact", &OptionsPerl::foldCompact
);
403 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD
,
404 "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
406 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage
,
407 "Set to 0 to disable folding packages when using the Perl lexer.");
409 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit
,
410 "Set to 0 to disable explicit folding.");
412 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse
,
413 "This option enables Perl folding on a \"} else {\" line of an if statement.");
415 DefineWordListSets(perlWordListDesc
);
419 class LexerPerl
: public DefaultLexer
{
420 CharacterSet setWordStart
;
421 CharacterSet setWord
;
422 CharacterSet setSpecialVar
;
423 CharacterSet setControlVar
;
426 OptionSetPerl osPerl
;
429 DefaultLexer("perl", SCLEX_PERL
),
430 setWordStart(CharacterSet::setAlpha
, "_", 0x80, true),
431 setWord(CharacterSet::setAlphaNum
, "_", 0x80, true),
432 setSpecialVar(CharacterSet::setNone
, "\"$;<>&`'+,./\\%:=~!?@[]"),
433 setControlVar(CharacterSet::setNone
, "ACDEFHILMNOPRSTVWX") {
435 virtual ~LexerPerl() {
437 void SCI_METHOD
Release() override
{
440 int SCI_METHOD
Version() const override
{
443 const char *SCI_METHOD
PropertyNames() override
{
444 return osPerl
.PropertyNames();
446 int SCI_METHOD
PropertyType(const char *name
) override
{
447 return osPerl
.PropertyType(name
);
449 const char *SCI_METHOD
DescribeProperty(const char *name
) override
{
450 return osPerl
.DescribeProperty(name
);
452 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
) override
;
453 const char * SCI_METHOD
PropertyGet(const char *key
) override
{
454 return osPerl
.PropertyGet(key
);
456 const char *SCI_METHOD
DescribeWordListSets() override
{
457 return osPerl
.DescribeWordListSets();
459 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
) override
;
460 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
461 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
463 void *SCI_METHOD
PrivateCall(int, void *) override
{
467 static ILexer5
*LexerFactoryPerl() {
468 return new LexerPerl();
470 int InputSymbolScan(StyleContext
&sc
);
471 void InterpolateSegment(StyleContext
&sc
, int maxSeg
, bool isPattern
=false);
474 Sci_Position SCI_METHOD
LexerPerl::PropertySet(const char *key
, const char *val
) {
475 if (osPerl
.PropertySet(&options
, key
, val
)) {
481 Sci_Position SCI_METHOD
LexerPerl::WordListSet(int n
, const char *wl
) {
482 WordList
*wordListN
= 0;
485 wordListN
= &keywords
;
488 Sci_Position firstModification
= -1;
492 if (*wordListN
!= wlNew
) {
494 firstModification
= 0;
497 return firstModification
;
500 int LexerPerl::InputSymbolScan(StyleContext
&sc
) {
501 // forward scan for matching > on same line; file handles
503 while ((c
= sc
.GetRelativeCharacter(++sLen
)) != 0) {
504 if (c
== '\r' || c
== '\n') {
506 } else if (c
== '>') {
507 if (sc
.Match("<=>")) // '<=>' case
515 void LexerPerl::InterpolateSegment(StyleContext
&sc
, int maxSeg
, bool isPattern
) {
516 // interpolate a segment (with no active backslashes or delimiters within)
517 // switch in or out of an interpolation style or continue current style
518 // commit variable patterns if found, trim segment, repeat until done
522 if ((maxSeg
> 1) && (sc
.ch
== '$' || sc
.ch
== '@')) {
523 // $#[$]*word [$@][$]*word (where word or {word} is always present)
526 if (sc
.ch
== '$' && sc
.chNext
== '#') { // starts with $#
529 while ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '$')) // >0 $ dereference within
531 if ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '{')) { // { start for {word}
536 int c
= sc
.GetRelativeCharacter(sLen
);
537 if (setWordStart
.Contains(c
)) { // word (various)
540 while (maxSeg
> sLen
) {
541 if (!setWord
.Contains(sc
.GetRelativeCharacter(sLen
)))
545 } else if (braces
&& IsADigit(c
) && (sLen
== 2)) { // digit for ${digit}
551 if ((maxSeg
> sLen
) && (sc
.GetRelativeCharacter(sLen
) == '}')) { // } end for {word}
557 if (!isVar
&& (maxSeg
> 1)) { // $- or @-specific variable patterns
561 if (IsADigit(c
)) { // $[0-9] and slurp trailing digits
564 while ((maxSeg
> sLen
) && IsADigit(sc
.GetRelativeCharacter(sLen
)))
566 } else if (setSpecialVar
.Contains(c
)) { // $ special variables
569 } else if (!isPattern
&& ((c
== '(') || (c
== ')') || (c
== '|'))) { // $ additional
572 } else if (c
== '^') { // $^A control-char style
574 if ((maxSeg
> sLen
) && setControlVar
.Contains(sc
.GetRelativeCharacter(sLen
))) {
579 } else if (sc
.ch
== '@') {
581 if (!isPattern
&& ((c
== '+') || (c
== '-'))) { // @ specials non-pattern
587 if (isVar
) { // commit as interpolated variable or normal character
588 if (sc
.state
< SCE_PL_STRING_VAR
)
589 sc
.SetState(sc
.state
+ INTERPOLATE_SHIFT
);
593 if (sc
.state
>= SCE_PL_STRING_VAR
)
594 sc
.SetState(sc
.state
- INTERPOLATE_SHIFT
);
599 if (sc
.state
>= SCE_PL_STRING_VAR
)
600 sc
.SetState(sc
.state
- INTERPOLATE_SHIFT
);
603 void SCI_METHOD
LexerPerl::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
604 LexAccessor
styler(pAccess
);
606 // keywords that forces /PATTERN/ at all times; should track vim's behaviour
608 reWords
.Set("elsif if split while");
611 CharacterSet
setSingleCharOp(CharacterSet::setNone
, "rwxoRWXOezsfdlpSbctugkTBMAC");
612 // lexing of "%*</" operators is non-trivial; these are missing in the set below
613 CharacterSet
setPerlOperator(CharacterSet::setNone
, "^&\\()-+=|{}[]:;>,?!.~");
614 CharacterSet
setQDelim(CharacterSet::setNone
, "qrwx");
615 CharacterSet
setModifiers(CharacterSet::setAlpha
);
616 CharacterSet
setPreferRE(CharacterSet::setNone
, "*/<%");
617 // setArray and setHash also accepts chars for special vars like $_,
618 // which are then truncated when the next char does not match setVar
619 CharacterSet
setVar(CharacterSet::setAlphaNum
, "#$_'", 0x80, true);
620 CharacterSet
setArray(CharacterSet::setAlpha
, "#$_+-", 0x80, true);
621 CharacterSet
setHash(CharacterSet::setAlpha
, "#$_!^+-", 0x80, true);
622 CharacterSet
&setPOD
= setModifiers
;
623 CharacterSet
setNonHereDoc(CharacterSet::setDigits
, "=$@");
624 CharacterSet
setHereDocDelim(CharacterSet::setAlphaNum
, "_");
625 CharacterSet
setSubPrototype(CharacterSet::setNone
, "\\[$@%&*+];_ \t");
626 CharacterSet
setRepetition(CharacterSet::setDigits
, ")\"'");
627 // for format identifiers
628 CharacterSet
setFormatStart(CharacterSet::setAlpha
, "_=");
629 CharacterSet
&setFormat
= setHereDocDelim
;
631 // Lexer for perl often has to backtrack to start of current style to determine
632 // which characters are being used as quotes, how deeply nested is the
633 // start position and what the termination string is for HERE documents.
635 class HereDocCls
{ // Class to manage HERE doc sequence
638 // 0: '<<' encountered
639 // 1: collect the delimiter
640 // 2: here doc text (lines after the delimiter)
641 int Quote
; // the char after '<<'
642 bool Quoted
; // true if Quote in ('\'','"','`')
643 bool StripIndent
; // true if '<<~' requested to strip leading whitespace
644 int DelimiterLength
; // strlen(Delimiter)
645 char Delimiter
[HERE_DELIM_MAX
]; // the Delimiter
654 void Append(int ch
) {
655 Delimiter
[DelimiterLength
++] = static_cast<char>(ch
);
656 Delimiter
[DelimiterLength
] = '\0';
661 HereDocCls HereDoc
; // TODO: FIFO for stacked here-docs
663 class QuoteCls
{ // Class to manage quote pairs
671 void New(int r
= 1) {
685 // additional state for number lexing
686 int numState
= PERLNUM_DECIMAL
;
689 Sci_PositionU endPos
= startPos
+ length
;
691 // Backtrack to beginning of style if required...
692 // If in a long distance lexical state, backtrack to find quote characters.
693 // Includes strings (may be multi-line), numbers (additional state), format
694 // bodies, as well as POD sections.
695 if (initStyle
== SCE_PL_HERE_Q
696 || initStyle
== SCE_PL_HERE_QQ
697 || initStyle
== SCE_PL_HERE_QX
698 || initStyle
== SCE_PL_FORMAT
699 || initStyle
== SCE_PL_HERE_QQ_VAR
700 || initStyle
== SCE_PL_HERE_QX_VAR
702 // backtrack through multiple styles to reach the delimiter start
703 int delim
= (initStyle
== SCE_PL_FORMAT
) ? SCE_PL_FORMAT_IDENT
:SCE_PL_HERE_DELIM
;
704 while ((startPos
> 1) && (styler
.StyleAt(startPos
) != delim
)) {
707 startPos
= styler
.LineStart(styler
.GetLine(startPos
));
708 initStyle
= styler
.StyleAt(startPos
- 1);
710 if (initStyle
== SCE_PL_STRING
711 || initStyle
== SCE_PL_STRING_QQ
712 || initStyle
== SCE_PL_BACKTICKS
713 || initStyle
== SCE_PL_STRING_QX
714 || initStyle
== SCE_PL_REGEX
715 || initStyle
== SCE_PL_STRING_QR
716 || initStyle
== SCE_PL_REGSUBST
717 || initStyle
== SCE_PL_STRING_VAR
718 || initStyle
== SCE_PL_STRING_QQ_VAR
719 || initStyle
== SCE_PL_BACKTICKS_VAR
720 || initStyle
== SCE_PL_STRING_QX_VAR
721 || initStyle
== SCE_PL_REGEX_VAR
722 || initStyle
== SCE_PL_STRING_QR_VAR
723 || initStyle
== SCE_PL_REGSUBST_VAR
725 // for interpolation, must backtrack through a mix of two different styles
726 int otherStyle
= (initStyle
>= SCE_PL_STRING_VAR
) ?
727 initStyle
- INTERPOLATE_SHIFT
: initStyle
+ INTERPOLATE_SHIFT
;
728 while (startPos
> 1) {
729 int st
= styler
.StyleAt(startPos
- 1);
730 if ((st
!= initStyle
) && (st
!= otherStyle
))
734 initStyle
= SCE_PL_DEFAULT
;
735 } else if (initStyle
== SCE_PL_STRING_Q
736 || initStyle
== SCE_PL_STRING_QW
737 || initStyle
== SCE_PL_XLAT
738 || initStyle
== SCE_PL_CHARACTER
739 || initStyle
== SCE_PL_NUMBER
740 || initStyle
== SCE_PL_IDENTIFIER
741 || initStyle
== SCE_PL_ERROR
742 || initStyle
== SCE_PL_SUB_PROTOTYPE
744 while ((startPos
> 1) && (styler
.StyleAt(startPos
- 1) == initStyle
)) {
747 initStyle
= SCE_PL_DEFAULT
;
748 } else if (initStyle
== SCE_PL_POD
749 || initStyle
== SCE_PL_POD_VERB
751 // POD backtracking finds preceding blank lines and goes back past them
752 Sci_Position ln
= styler
.GetLine(startPos
);
754 initStyle
= styler
.StyleAt(styler
.LineStart(--ln
));
755 if (initStyle
== SCE_PL_POD
|| initStyle
== SCE_PL_POD_VERB
) {
756 while (ln
> 0 && styler
.GetLineState(ln
) == SCE_PL_DEFAULT
)
759 startPos
= styler
.LineStart(++ln
);
760 initStyle
= styler
.StyleAt(startPos
- 1);
763 initStyle
= SCE_PL_DEFAULT
;
767 // backFlag, backPos are additional state to aid identifier corner cases.
768 // Look backwards past whitespace and comments in order to detect either
769 // operator or keyword. Later updated as we go along.
770 int backFlag
= BACK_NONE
;
771 Sci_PositionU backPos
= startPos
;
774 skipWhitespaceComment(styler
, backPos
);
775 if (styler
.StyleAt(backPos
) == SCE_PL_OPERATOR
)
776 backFlag
= BACK_OPERATOR
;
777 else if (styler
.StyleAt(backPos
) == SCE_PL_WORD
)
778 backFlag
= BACK_KEYWORD
;
782 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
784 for (; sc
.More(); sc
.Forward()) {
786 // Determine if the current state should terminate.
788 case SCE_PL_OPERATOR
:
789 sc
.SetState(SCE_PL_DEFAULT
);
790 backFlag
= BACK_OPERATOR
;
791 backPos
= sc
.currentPos
;
793 case SCE_PL_IDENTIFIER
: // identifier, bareword, inputsymbol
794 if ((!setWord
.Contains(sc
.ch
) && sc
.ch
!= '\'')
795 || sc
.Match('.', '.')
796 || sc
.chPrev
== '>') { // end of inputsymbol
797 sc
.SetState(SCE_PL_DEFAULT
);
800 case SCE_PL_WORD
: // keyword, plus special cases
801 if (!setWord
.Contains(sc
.ch
)) {
803 sc
.GetCurrent(s
, sizeof(s
));
804 if ((strcmp(s
, "__DATA__") == 0) || (strcmp(s
, "__END__") == 0)) {
805 sc
.ChangeState(SCE_PL_DATASECTION
);
807 if ((strcmp(s
, "format") == 0)) {
808 sc
.SetState(SCE_PL_FORMAT_IDENT
);
811 sc
.SetState(SCE_PL_DEFAULT
);
813 backFlag
= BACK_KEYWORD
;
814 backPos
= sc
.currentPos
;
821 case SCE_PL_SYMBOLTABLE
:
822 if (sc
.Match(':', ':')) { // skip ::
824 } else if (!setVar
.Contains(sc
.ch
)) {
825 if (sc
.LengthCurrent() == 1) {
826 // Special variable: $(, $_ etc.
829 sc
.SetState(SCE_PL_DEFAULT
);
833 // if no early break, number style is terminated at "(go through)"
835 if (sc
.chNext
== '.') {
836 // double dot is always an operator (go through)
837 } else if (numState
<= PERLNUM_FLOAT_EXP
) {
838 // non-decimal number or float exponent, consume next dot
839 sc
.SetState(SCE_PL_OPERATOR
);
841 } else { // decimal or vectors allows dots
843 if (numState
== PERLNUM_DECIMAL
) {
844 if (dotCount
<= 1) // number with one dot in it
846 if (IsADigit(sc
.chNext
)) { // really a vector
847 numState
= PERLNUM_VECTOR
;
850 // number then dot (go through)
851 } else if (numState
== PERLNUM_HEX
) {
852 if (dotCount
<= 1 && IsADigit(sc
.chNext
, 16)) {
853 break; // hex with one dot is a hex float
855 sc
.SetState(SCE_PL_OPERATOR
);
858 // hex then dot (go through)
859 } else if (IsADigit(sc
.chNext
)) // vectors
861 // vector then dot (go through)
863 } else if (sc
.ch
== '_') {
864 // permissive underscoring for number and vector literals
866 } else if (numState
== PERLNUM_DECIMAL
) {
867 if (sc
.ch
== 'E' || sc
.ch
== 'e') { // exponent, sign
868 numState
= PERLNUM_FLOAT_EXP
;
869 if (sc
.chNext
== '+' || sc
.chNext
== '-') {
873 } else if (IsADigit(sc
.ch
))
875 // number then word (go through)
876 } else if (numState
== PERLNUM_HEX
) {
877 if (sc
.ch
== 'P' || sc
.ch
== 'p') { // hex float exponent, sign
878 numState
= PERLNUM_FLOAT_EXP
;
879 if (sc
.chNext
== '+' || sc
.chNext
== '-') {
883 } else if (IsADigit(sc
.ch
, 16))
885 // hex or hex float then word (go through)
886 } else if (numState
== PERLNUM_VECTOR
|| numState
== PERLNUM_V_VECTOR
) {
887 if (IsADigit(sc
.ch
)) // vector
889 if (setWord
.Contains(sc
.ch
) && dotCount
== 0) { // change to word
890 sc
.ChangeState(SCE_PL_IDENTIFIER
);
893 // vector then word (go through)
894 } else if (IsADigit(sc
.ch
)) {
895 if (numState
== PERLNUM_FLOAT_EXP
) {
897 } else if (numState
== PERLNUM_OCTAL
) {
898 if (sc
.ch
<= '7') break;
899 } else if (numState
== PERLNUM_BINARY
) {
900 if (sc
.ch
<= '1') break;
902 // mark invalid octal, binary numbers (go through)
903 numState
= PERLNUM_BAD
;
906 // complete current number or vector
907 sc
.ChangeState(actualNumStyle(numState
));
908 sc
.SetState(SCE_PL_DEFAULT
);
910 case SCE_PL_COMMENTLINE
:
911 if (sc
.atLineStart
) {
912 sc
.SetState(SCE_PL_DEFAULT
);
915 case SCE_PL_HERE_DELIM
:
916 if (HereDoc
.State
== 0) { // '<<' encountered
917 int delim_ch
= sc
.chNext
;
918 Sci_Position ws_skip
= 0;
919 HereDoc
.State
= 1; // pre-init HERE doc class
920 HereDoc
.Quote
= sc
.chNext
;
921 HereDoc
.Quoted
= false;
922 HereDoc
.StripIndent
= false;
923 HereDoc
.DelimiterLength
= 0;
924 HereDoc
.Delimiter
[HereDoc
.DelimiterLength
] = '\0';
925 if (delim_ch
== '~') { // was actually '<<~'
927 HereDoc
.StripIndent
= true;
928 HereDoc
.Quote
= delim_ch
= sc
.chNext
;
930 if (IsASpaceOrTab(delim_ch
)) {
931 // skip whitespace; legal only for quoted delimiters
932 Sci_PositionU i
= sc
.currentPos
+ 1;
933 while ((i
< endPos
) && IsASpaceOrTab(delim_ch
)) {
935 delim_ch
= static_cast<unsigned char>(styler
.SafeGetCharAt(i
));
937 ws_skip
= i
- sc
.currentPos
- 1;
939 if (delim_ch
== '\'' || delim_ch
== '"' || delim_ch
== '`') {
940 // a quoted here-doc delimiter; skip any whitespace
941 sc
.Forward(ws_skip
+ 1);
942 HereDoc
.Quote
= delim_ch
;
943 HereDoc
.Quoted
= true;
944 } else if ((ws_skip
== 0 && setNonHereDoc
.Contains(sc
.chNext
))
946 // left shift << or <<= operator cases
947 // restore position if operator
948 sc
.ChangeState(SCE_PL_OPERATOR
);
949 sc
.ForwardSetState(SCE_PL_DEFAULT
);
950 backFlag
= BACK_OPERATOR
;
951 backPos
= sc
.currentPos
;
954 // specially handle initial '\' for identifier
955 if (ws_skip
== 0 && HereDoc
.Quote
== '\\')
957 // an unquoted here-doc delimiter, no special handling
958 // (cannot be prefixed by spaces/tabs), or
959 // symbols terminates; deprecated zero-length delimiter
961 } else if (HereDoc
.State
== 1) { // collect the delimiter
962 backFlag
= BACK_NONE
;
963 if (HereDoc
.Quoted
) { // a quoted here-doc delimiter
964 if (sc
.ch
== HereDoc
.Quote
) { // closing quote => end of delimiter
965 sc
.ForwardSetState(SCE_PL_DEFAULT
);
966 } else if (!sc
.atLineEnd
) {
967 if (sc
.Match('\\', static_cast<char>(HereDoc
.Quote
))) { // escaped quote
970 if (sc
.ch
!= '\r') { // skip CR if CRLF
971 int i
= 0; // else append char, possibly an extended char
972 while (i
< sc
.width
) {
973 HereDoc
.Append(static_cast<unsigned char>(styler
.SafeGetCharAt(sc
.currentPos
+ i
)));
978 } else { // an unquoted here-doc delimiter, no extended charsets
979 if (setHereDocDelim
.Contains(sc
.ch
)) {
980 HereDoc
.Append(sc
.ch
);
982 sc
.SetState(SCE_PL_DEFAULT
);
985 if (HereDoc
.DelimiterLength
>= HERE_DELIM_MAX
- 1) {
986 sc
.SetState(SCE_PL_ERROR
);
994 // also implies HereDoc.State == 2
996 if (HereDoc
.StripIndent
) {
998 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1001 if (HereDoc
.DelimiterLength
== 0 || sc
.Match(HereDoc
.Delimiter
)) {
1002 int c
= sc
.GetRelative(HereDoc
.DelimiterLength
);
1003 if (c
== '\r' || c
== '\n') { // peek first, do not consume match
1004 sc
.ForwardBytes(HereDoc
.DelimiterLength
);
1005 sc
.SetState(SCE_PL_DEFAULT
);
1006 backFlag
= BACK_NONE
;
1013 if (sc
.state
== SCE_PL_HERE_Q
) { // \EOF and 'EOF' non-interpolated
1014 while (!sc
.atLineEnd
)
1018 while (!sc
.atLineEnd
) { // "EOF" and `EOF` interpolated
1019 int c
, sLen
= 0, endType
= 0;
1020 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1021 // scan to break string into segments
1024 } else if (c
== '\r' || c
== '\n') {
1029 if (sLen
> 0) // process non-empty segments
1030 InterpolateSegment(sc
, sLen
);
1033 // \ at end-of-line does not appear to have any effect, skip
1034 if (sc
.ch
!= '\r' && sc
.ch
!= '\n')
1036 } else if (endType
== 2) {
1043 case SCE_PL_POD_VERB
: {
1044 Sci_PositionU fw
= sc
.currentPos
;
1045 Sci_Position ln
= styler
.GetLine(fw
);
1046 if (sc
.atLineStart
&& sc
.Match("=cut")) { // end of POD
1047 sc
.SetState(SCE_PL_POD
);
1049 sc
.SetState(SCE_PL_DEFAULT
);
1050 styler
.SetLineState(ln
, SCE_PL_POD
);
1053 int pod
= podLineScan(styler
, fw
, endPos
); // classify POD line
1054 styler
.SetLineState(ln
, pod
);
1055 if (pod
== SCE_PL_DEFAULT
) {
1056 if (sc
.state
== SCE_PL_POD_VERB
) {
1057 Sci_PositionU fw2
= fw
;
1058 while (fw2
< (endPos
- 1) && pod
== SCE_PL_DEFAULT
) {
1059 fw
= fw2
++; // penultimate line (last blank line)
1060 pod
= podLineScan(styler
, fw2
, endPos
);
1061 styler
.SetLineState(styler
.GetLine(fw2
), pod
);
1063 if (pod
== SCE_PL_POD
) { // truncate verbatim POD early
1064 sc
.SetState(SCE_PL_POD
);
1069 if (pod
== SCE_PL_POD_VERB
// still part of current paragraph
1070 && (styler
.GetLineState(ln
- 1) == SCE_PL_POD
)) {
1072 styler
.SetLineState(ln
, pod
);
1073 } else if (pod
== SCE_PL_POD
1074 && (styler
.GetLineState(ln
- 1) == SCE_PL_POD_VERB
)) {
1075 pod
= SCE_PL_POD_VERB
;
1076 styler
.SetLineState(ln
, pod
);
1080 sc
.ForwardBytes(fw
- sc
.currentPos
); // commit style
1084 case SCE_PL_STRING_QR
:
1085 if (Quote
.Rep
<= 0) {
1086 if (!setModifiers
.Contains(sc
.ch
))
1087 sc
.SetState(SCE_PL_DEFAULT
);
1088 } else if (!Quote
.Up
&& !IsASpace(sc
.ch
)) {
1091 int c
, sLen
= 0, endType
= 0;
1092 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1093 // scan to break string into segments
1096 } else if (c
== '\\' && Quote
.Up
!= '\\') {
1098 } else if (c
== Quote
.Down
) {
1100 if (Quote
.Count
== 0) {
1104 } else if (c
== Quote
.Up
)
1108 if (sLen
> 0) { // process non-empty segments
1109 if (Quote
.Up
!= '\'') {
1110 InterpolateSegment(sc
, sLen
, true);
1111 } else // non-interpolated path
1118 case SCE_PL_REGSUBST
:
1120 if (Quote
.Rep
<= 0) {
1121 if (!setModifiers
.Contains(sc
.ch
))
1122 sc
.SetState(SCE_PL_DEFAULT
);
1123 } else if (!Quote
.Up
&& !IsASpace(sc
.ch
)) {
1126 int c
, sLen
= 0, endType
= 0;
1127 bool isPattern
= (Quote
.Rep
== 2);
1128 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1129 // scan to break string into segments
1130 if (c
== '\\' && Quote
.Up
!= '\\') {
1132 } else if (Quote
.Count
== 0 && Quote
.Rep
== 1) {
1133 // We matched something like s(...) or tr{...}, Perl 5.10
1134 // appears to allow almost any character for use as the
1135 // next delimiters. Whitespace and comments are accepted in
1136 // between, but we'll limit to whitespace here.
1137 // For '#', if no whitespace in between, it's a delimiter.
1140 } else if (c
== '#' && IsASpaceOrTab(sc
.GetRelativeCharacter(sLen
- 1))) {
1145 } else if (c
== Quote
.Down
) {
1147 if (Quote
.Count
== 0) {
1151 if (Quote
.Up
== Quote
.Down
)
1155 } else if (c
== Quote
.Up
) {
1157 } else if (IsASpace(c
))
1161 if (sLen
> 0) { // process non-empty segments
1162 if (sc
.state
== SCE_PL_REGSUBST
&& Quote
.Up
!= '\'') {
1163 InterpolateSegment(sc
, sLen
, isPattern
);
1164 } else // non-interpolated path
1169 } else if (endType
== 3)
1170 sc
.SetState(SCE_PL_DEFAULT
);
1173 case SCE_PL_STRING_Q
:
1174 case SCE_PL_STRING_QQ
:
1175 case SCE_PL_STRING_QX
:
1176 case SCE_PL_STRING_QW
:
1178 case SCE_PL_CHARACTER
:
1179 case SCE_PL_BACKTICKS
:
1180 if (!Quote
.Down
&& !IsASpace(sc
.ch
)) {
1183 int c
, sLen
= 0, endType
= 0;
1184 while ((c
= sc
.GetRelativeCharacter(sLen
)) != 0) {
1185 // scan to break string into segments
1188 } else if (c
== '\\' && Quote
.Up
!= '\\') {
1190 } else if (c
== Quote
.Down
) {
1192 if (Quote
.Count
== 0) {
1195 } else if (c
== Quote
.Up
)
1199 if (sLen
> 0) { // process non-empty segments
1202 case SCE_PL_STRING_QQ
:
1203 case SCE_PL_BACKTICKS
:
1204 InterpolateSegment(sc
, sLen
);
1206 case SCE_PL_STRING_QX
:
1207 if (Quote
.Up
!= '\'') {
1208 InterpolateSegment(sc
, sLen
);
1211 // (continued for ' delim)
1213 default: // non-interpolated path
1219 } else if (endType
== 3)
1220 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1223 case SCE_PL_SUB_PROTOTYPE
: {
1225 // forward scan; must all be valid proto characters
1226 while (setSubPrototype
.Contains(sc
.GetRelative(i
)))
1228 if (sc
.GetRelative(i
) == ')') { // valid sub prototype
1230 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1232 // abandon prototype, restart from '('
1233 sc
.ChangeState(SCE_PL_OPERATOR
);
1234 sc
.SetState(SCE_PL_DEFAULT
);
1238 case SCE_PL_FORMAT
: {
1240 if (sc
.Match('.')) {
1242 if (sc
.atLineEnd
|| ((sc
.ch
== '\r' && sc
.chNext
== '\n')))
1243 sc
.SetState(SCE_PL_DEFAULT
);
1245 while (!sc
.atLineEnd
)
1252 // Needed for specific continuation styles (one follows the other)
1254 // continued from SCE_PL_WORD
1255 case SCE_PL_FORMAT_IDENT
:
1256 // occupies HereDoc state 3 to avoid clashing with HERE docs
1257 if (IsASpaceOrTab(sc
.ch
)) { // skip whitespace
1258 sc
.ChangeState(SCE_PL_DEFAULT
);
1259 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1261 sc
.SetState(SCE_PL_FORMAT_IDENT
);
1263 if (setFormatStart
.Contains(sc
.ch
)) { // identifier or '='
1267 } while (setFormat
.Contains(sc
.ch
));
1269 while (IsASpaceOrTab(sc
.ch
) && !sc
.atLineEnd
)
1272 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1275 // invalid identifier; inexact fallback, but hey
1276 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1277 sc
.SetState(SCE_PL_DEFAULT
);
1280 sc
.ChangeState(SCE_PL_DEFAULT
); // invalid identifier
1282 backFlag
= BACK_NONE
;
1286 // Must check end of HereDoc states here before default state is handled
1287 if (HereDoc
.State
== 1 && sc
.atLineEnd
) {
1288 // Begin of here-doc (the line after the here-doc delimiter):
1289 // Lexically, the here-doc starts from the next line after the >>, but the
1290 // first line of here-doc seem to follow the style of the last EOL sequence
1291 int st_new
= SCE_PL_HERE_QQ
;
1293 if (HereDoc
.Quoted
) {
1294 if (sc
.state
== SCE_PL_HERE_DELIM
) {
1295 // Missing quote at end of string! We are stricter than perl.
1296 // Colour here-doc anyway while marking this bit as an error.
1297 sc
.ChangeState(SCE_PL_ERROR
);
1299 switch (HereDoc
.Quote
) {
1301 st_new
= SCE_PL_HERE_Q
;
1304 st_new
= SCE_PL_HERE_QQ
;
1307 st_new
= SCE_PL_HERE_QX
;
1311 if (HereDoc
.Quote
== '\\')
1312 st_new
= SCE_PL_HERE_Q
;
1314 sc
.SetState(st_new
);
1316 if (HereDoc
.State
== 3 && sc
.atLineEnd
) {
1317 // Start of format body.
1319 sc
.SetState(SCE_PL_FORMAT
);
1322 // Determine if a new state should be entered.
1323 if (sc
.state
== SCE_PL_DEFAULT
) {
1324 if (IsADigit(sc
.ch
) ||
1325 (IsADigit(sc
.chNext
) && (sc
.ch
== '.' || sc
.ch
== 'v'))) {
1326 sc
.SetState(SCE_PL_NUMBER
);
1327 backFlag
= BACK_NONE
;
1328 numState
= PERLNUM_DECIMAL
;
1330 if (sc
.ch
== '0') { // hex,bin,octal
1331 if (sc
.chNext
== 'x' || sc
.chNext
== 'X') {
1332 numState
= PERLNUM_HEX
;
1333 } else if (sc
.chNext
== 'b' || sc
.chNext
== 'B') {
1334 numState
= PERLNUM_BINARY
;
1335 } else if (IsADigit(sc
.chNext
)) {
1336 numState
= PERLNUM_OCTAL
;
1338 if (numState
!= PERLNUM_DECIMAL
) {
1341 } else if (sc
.ch
== 'v') { // vector
1342 numState
= PERLNUM_V_VECTOR
;
1344 } else if (setWord
.Contains(sc
.ch
)) {
1345 // if immediately prefixed by '::', always a bareword
1346 sc
.SetState(SCE_PL_WORD
);
1347 if (sc
.chPrev
== ':' && sc
.GetRelative(-2) == ':') {
1348 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1350 Sci_PositionU bk
= sc
.currentPos
;
1351 Sci_PositionU fw
= sc
.currentPos
+ 1;
1352 // first check for possible quote-like delimiter
1353 if (sc
.ch
== 's' && !setWord
.Contains(sc
.chNext
)) {
1354 sc
.ChangeState(SCE_PL_REGSUBST
);
1356 } else if (sc
.ch
== 'm' && !setWord
.Contains(sc
.chNext
)) {
1357 sc
.ChangeState(SCE_PL_REGEX
);
1359 } else if (sc
.ch
== 'q' && !setWord
.Contains(sc
.chNext
)) {
1360 sc
.ChangeState(SCE_PL_STRING_Q
);
1362 } else if (sc
.ch
== 'y' && !setWord
.Contains(sc
.chNext
)) {
1363 sc
.ChangeState(SCE_PL_XLAT
);
1365 } else if (sc
.Match('t', 'r') && !setWord
.Contains(sc
.GetRelative(2))) {
1366 sc
.ChangeState(SCE_PL_XLAT
);
1370 } else if (sc
.ch
== 'q' && setQDelim
.Contains(sc
.chNext
)
1371 && !setWord
.Contains(sc
.GetRelative(2))) {
1372 if (sc
.chNext
== 'q') sc
.ChangeState(SCE_PL_STRING_QQ
);
1373 else if (sc
.chNext
== 'x') sc
.ChangeState(SCE_PL_STRING_QX
);
1374 else if (sc
.chNext
== 'r') sc
.ChangeState(SCE_PL_STRING_QR
);
1375 else sc
.ChangeState(SCE_PL_STRING_QW
); // sc.chNext == 'w'
1379 } else if (sc
.ch
== 'x' && (sc
.chNext
== '=' || // repetition
1380 !setWord
.Contains(sc
.chNext
) ||
1381 (setRepetition
.Contains(sc
.chPrev
) && IsADigit(sc
.chNext
)))) {
1382 sc
.ChangeState(SCE_PL_OPERATOR
);
1384 // if potentially a keyword, scan forward and grab word, then check
1385 // if it's really one; if yes, disambiguation test is performed
1386 // otherwise it is always a bareword and we skip a lot of scanning
1387 if (sc
.state
== SCE_PL_WORD
) {
1388 while (setWord
.Contains(static_cast<unsigned char>(styler
.SafeGetCharAt(fw
))))
1390 if (!isPerlKeyword(styler
.GetStartSegment(), fw
, keywords
, styler
)) {
1391 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1394 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1395 // for quote-like delimiters/keywords, attempt to disambiguate
1396 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1397 if (sc
.state
!= SCE_PL_IDENTIFIER
&& bk
> 0) {
1398 if (disambiguateBareword(styler
, bk
, fw
, backFlag
, backPos
, endPos
))
1399 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1401 backFlag
= BACK_NONE
;
1402 } else if (sc
.ch
== '#') {
1403 sc
.SetState(SCE_PL_COMMENTLINE
);
1404 } else if (sc
.ch
== '\"') {
1405 sc
.SetState(SCE_PL_STRING
);
1408 backFlag
= BACK_NONE
;
1409 } else if (sc
.ch
== '\'') {
1410 if (sc
.chPrev
== '&' && setWordStart
.Contains(sc
.chNext
)) {
1412 sc
.SetState(SCE_PL_IDENTIFIER
);
1414 sc
.SetState(SCE_PL_CHARACTER
);
1418 backFlag
= BACK_NONE
;
1419 } else if (sc
.ch
== '`') {
1420 sc
.SetState(SCE_PL_BACKTICKS
);
1423 backFlag
= BACK_NONE
;
1424 } else if (sc
.ch
== '$') {
1425 sc
.SetState(SCE_PL_SCALAR
);
1426 if (sc
.chNext
== '{') {
1427 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1428 } else if (IsASpace(sc
.chNext
)) {
1429 sc
.ForwardSetState(SCE_PL_DEFAULT
);
1432 if (sc
.Match('`', '`') || sc
.Match(':', ':')) {
1436 backFlag
= BACK_NONE
;
1437 } else if (sc
.ch
== '@') {
1438 sc
.SetState(SCE_PL_ARRAY
);
1439 if (setArray
.Contains(sc
.chNext
)) {
1440 // no special treatment
1441 } else if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1443 } else if (sc
.chNext
== '{' || sc
.chNext
== '[') {
1444 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1446 sc
.ChangeState(SCE_PL_OPERATOR
);
1448 backFlag
= BACK_NONE
;
1449 } else if (setPreferRE
.Contains(sc
.ch
)) {
1450 // Explicit backward peeking to set a consistent preferRE for
1451 // any slash found, so no longer need to track preferRE state.
1452 // Find first previous significant lexed element and interpret.
1453 // A few symbols shares this code for disambiguation.
1454 bool preferRE
= false;
1455 bool isHereDoc
= sc
.Match('<', '<');
1456 bool hereDocSpace
= false; // for: SCALAR [whitespace] '<<'
1457 Sci_PositionU bk
= (sc
.currentPos
> 0) ? sc
.currentPos
- 1: 0;
1460 if (styler
.StyleAt(bk
) == SCE_PL_DEFAULT
)
1461 hereDocSpace
= true;
1462 skipWhitespaceComment(styler
, bk
);
1464 // avoid backward scanning breakage
1467 int bkstyle
= styler
.StyleAt(bk
);
1468 int bkch
= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
));
1470 case SCE_PL_OPERATOR
:
1472 if (bkch
== ')' || bkch
== ']') {
1474 } else if (bkch
== '}') {
1475 // backtrack by counting balanced brace pairs
1476 // needed to test for variables like ${}, @{} etc.
1477 bkstyle
= styleBeforeBracePair(styler
, bk
);
1478 if (bkstyle
== SCE_PL_SCALAR
1479 || bkstyle
== SCE_PL_ARRAY
1480 || bkstyle
== SCE_PL_HASH
1481 || bkstyle
== SCE_PL_SYMBOLTABLE
1482 || bkstyle
== SCE_PL_OPERATOR
) {
1485 } else if (bkch
== '+' || bkch
== '-') {
1486 if (bkch
== static_cast<unsigned char>(styler
.SafeGetCharAt(bk
- 1))
1487 && bkch
!= static_cast<unsigned char>(styler
.SafeGetCharAt(bk
- 2)))
1488 // exceptions for operators: unary suffixes ++, --
1492 case SCE_PL_IDENTIFIER
:
1494 bkstyle
= styleCheckIdentifier(styler
, bk
);
1495 if ((bkstyle
== 1) || (bkstyle
== 2)) {
1496 // inputsymbol or var with "->" or "::" before identifier
1498 } else if (bkstyle
== 3) {
1499 // bare identifier, test cases follows:
1501 // if '/', /PATTERN/ unless digit/space immediately after '/'
1502 // if '//', always expect defined-or operator to follow identifier
1503 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.chNext
== '/')
1505 } else if (sc
.ch
== '*' || sc
.ch
== '%') {
1506 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.Match('*', '*'))
1508 } else if (sc
.ch
== '<') {
1509 if (IsASpace(sc
.chNext
) || sc
.chNext
== '=')
1514 case SCE_PL_SCALAR
: // for $var<< case:
1515 if (isHereDoc
&& hereDocSpace
) // if SCALAR whitespace '<<', *always* a HERE doc
1520 // for HERE docs, always true
1522 // adopt heuristics similar to vim-style rules:
1523 // keywords always forced as /PATTERN/: split, if, elsif, while
1524 // everything else /PATTERN/ unless digit/space immediately after '/'
1525 // for '//', defined-or favoured unless special keywords
1526 Sci_PositionU bkend
= bk
+ 1;
1527 while (bk
> 0 && styler
.StyleAt(bk
- 1) == SCE_PL_WORD
) {
1530 if (isPerlKeyword(bk
, bkend
, reWords
, styler
))
1532 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.chNext
== '/')
1534 } else if (sc
.ch
== '*' || sc
.ch
== '%') {
1535 if (IsASpace(sc
.chNext
) || IsADigit(sc
.chNext
) || sc
.Match('*', '*'))
1537 } else if (sc
.ch
== '<') {
1538 if (IsASpace(sc
.chNext
) || sc
.chNext
== '=')
1543 // other styles uses the default, preferRE=false
1546 case SCE_PL_HERE_QQ
:
1547 case SCE_PL_HERE_QX
:
1552 backFlag
= BACK_NONE
;
1553 if (isHereDoc
) { // handle '<<', HERE doc
1554 if (sc
.Match("<<>>")) { // double-diamond operator (5.22)
1555 sc
.SetState(SCE_PL_OPERATOR
);
1557 } else if (preferRE
) {
1558 sc
.SetState(SCE_PL_HERE_DELIM
);
1560 } else { // << operator
1561 sc
.SetState(SCE_PL_OPERATOR
);
1564 } else if (sc
.ch
== '*') { // handle '*', typeglob
1566 sc
.SetState(SCE_PL_SYMBOLTABLE
);
1567 if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1569 } else if (sc
.chNext
== '{') {
1570 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1575 sc
.SetState(SCE_PL_OPERATOR
);
1576 if (sc
.chNext
== '*') // exponentiation
1579 } else if (sc
.ch
== '%') { // handle '%', hash
1581 sc
.SetState(SCE_PL_HASH
);
1582 if (setHash
.Contains(sc
.chNext
)) {
1584 } else if (sc
.chNext
== ':' && sc
.GetRelative(2) == ':') {
1586 } else if (sc
.chNext
== '{') {
1587 sc
.ForwardSetState(SCE_PL_OPERATOR
);
1589 sc
.ChangeState(SCE_PL_OPERATOR
);
1592 sc
.SetState(SCE_PL_OPERATOR
);
1594 } else if (sc
.ch
== '<') { // handle '<', inputsymbol
1597 int i
= InputSymbolScan(sc
);
1599 sc
.SetState(SCE_PL_IDENTIFIER
);
1602 sc
.SetState(SCE_PL_OPERATOR
);
1605 sc
.SetState(SCE_PL_OPERATOR
);
1607 } else { // handle '/', regexp
1609 sc
.SetState(SCE_PL_REGEX
);
1612 } else { // / and // operators
1613 sc
.SetState(SCE_PL_OPERATOR
);
1614 if (sc
.chNext
== '/') {
1619 } else if (sc
.ch
== '=' // POD
1620 && setPOD
.Contains(sc
.chNext
)
1621 && sc
.atLineStart
) {
1622 sc
.SetState(SCE_PL_POD
);
1623 backFlag
= BACK_NONE
;
1624 } else if (sc
.ch
== '-' && setWordStart
.Contains(sc
.chNext
)) { // extended '-' cases
1625 Sci_PositionU bk
= sc
.currentPos
;
1626 Sci_PositionU fw
= 2;
1627 if (setSingleCharOp
.Contains(sc
.chNext
) && // file test operators
1628 !setWord
.Contains(sc
.GetRelative(2))) {
1629 sc
.SetState(SCE_PL_WORD
);
1631 // nominally a minus and bareword; find extent of bareword
1632 while (setWord
.Contains(sc
.GetRelative(fw
)))
1634 sc
.SetState(SCE_PL_OPERATOR
);
1636 // force to bareword for hash key => or {variable literal} cases
1637 if (disambiguateBareword(styler
, bk
, bk
+ fw
, backFlag
, backPos
, endPos
) & 2) {
1638 sc
.ChangeState(SCE_PL_IDENTIFIER
);
1640 backFlag
= BACK_NONE
;
1641 } else if (sc
.ch
== '(' && sc
.currentPos
> 0) { // '(' or subroutine prototype
1643 if (styleCheckSubPrototype(styler
, sc
.currentPos
- 1)) {
1644 sc
.SetState(SCE_PL_SUB_PROTOTYPE
);
1645 backFlag
= BACK_NONE
;
1647 sc
.SetState(SCE_PL_OPERATOR
);
1649 } else if (setPerlOperator
.Contains(sc
.ch
)) { // operators
1650 sc
.SetState(SCE_PL_OPERATOR
);
1651 if (sc
.Match('.', '.')) { // .. and ...
1653 if (sc
.chNext
== '.') sc
.Forward();
1655 } else if (sc
.ch
== 4 || sc
.ch
== 26) { // ^D and ^Z ends valid perl source
1656 sc
.SetState(SCE_PL_DATASECTION
);
1658 // keep colouring defaults
1664 if (sc
.state
== SCE_PL_HERE_Q
1665 || sc
.state
== SCE_PL_HERE_QQ
1666 || sc
.state
== SCE_PL_HERE_QX
1667 || sc
.state
== SCE_PL_FORMAT
) {
1668 styler
.ChangeLexerState(sc
.currentPos
, styler
.Length());
1673 #define PERL_HEADFOLD_SHIFT 4
1674 #define PERL_HEADFOLD_MASK 0xF0
1676 void SCI_METHOD
LexerPerl::Fold(Sci_PositionU startPos
, Sci_Position length
, int /* initStyle */, IDocument
*pAccess
) {
1681 LexAccessor
styler(pAccess
);
1683 Sci_PositionU endPos
= startPos
+ length
;
1684 int visibleChars
= 0;
1685 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
1687 // Backtrack to previous line in case need to fix its fold status
1689 if (lineCurrent
> 0) {
1691 startPos
= styler
.LineStart(lineCurrent
);
1695 int levelPrev
= SC_FOLDLEVELBASE
;
1696 if (lineCurrent
> 0)
1697 levelPrev
= styler
.LevelAt(lineCurrent
- 1) >> 16;
1698 int levelCurrent
= levelPrev
;
1699 char chNext
= styler
[startPos
];
1700 char chPrev
= styler
.SafeGetCharAt(startPos
- 1);
1701 int styleNext
= styler
.StyleAt(startPos
);
1702 // Used at end of line to determine if the line was a package definition
1703 bool isPackageLine
= false;
1705 for (Sci_PositionU i
= startPos
; i
< endPos
; i
++) {
1707 chNext
= styler
.SafeGetCharAt(i
+ 1);
1708 int style
= styleNext
;
1709 styleNext
= styler
.StyleAt(i
+ 1);
1710 int stylePrevCh
= (i
) ? styler
.StyleAt(i
- 1):SCE_PL_DEFAULT
;
1711 bool atEOL
= (ch
== '\r' && chNext
!= '\n') || (ch
== '\n');
1712 bool atLineStart
= ((chPrev
== '\r') || (chPrev
== '\n')) || i
== 0;
1714 if (options
.foldComment
&& atEOL
&& IsCommentLine(lineCurrent
, styler
)) {
1715 if (!IsCommentLine(lineCurrent
- 1, styler
)
1716 && IsCommentLine(lineCurrent
+ 1, styler
))
1718 else if (IsCommentLine(lineCurrent
- 1, styler
)
1719 && !IsCommentLine(lineCurrent
+ 1, styler
))
1722 // {} [] block folding
1723 if (style
== SCE_PL_OPERATOR
) {
1725 if (options
.foldAtElse
&& levelCurrent
< levelPrev
)
1728 } else if (ch
== '}') {
1732 if (options
.foldAtElse
&& levelCurrent
< levelPrev
)
1735 } else if (ch
== ']') {
1738 } else if (style
== SCE_PL_STRING_QW
) {
1740 if (stylePrevCh
!= style
)
1742 else if (styleNext
!= style
)
1746 if (options
.foldPOD
&& atLineStart
) {
1747 if (style
== SCE_PL_POD
) {
1748 if (stylePrevCh
!= SCE_PL_POD
&& stylePrevCh
!= SCE_PL_POD_VERB
)
1750 else if (styler
.Match(i
, "=cut"))
1751 levelCurrent
= (levelCurrent
& ~PERL_HEADFOLD_MASK
) - 1;
1752 else if (styler
.Match(i
, "=head"))
1753 podHeading
= PodHeadingLevel(i
, styler
);
1754 } else if (style
== SCE_PL_DATASECTION
) {
1755 if (ch
== '=' && IsASCII(chNext
) && isalpha(chNext
) && levelCurrent
== SC_FOLDLEVELBASE
)
1757 else if (styler
.Match(i
, "=cut") && levelCurrent
> SC_FOLDLEVELBASE
)
1758 levelCurrent
= (levelCurrent
& ~PERL_HEADFOLD_MASK
) - 1;
1759 else if (styler
.Match(i
, "=head"))
1760 podHeading
= PodHeadingLevel(i
, styler
);
1761 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1762 // reset needed as level test is vs. SC_FOLDLEVELBASE
1763 else if (stylePrevCh
!= SCE_PL_DATASECTION
)
1764 levelCurrent
= SC_FOLDLEVELBASE
;
1768 if (options
.foldPackage
&& atLineStart
) {
1769 if (IsPackageLine(lineCurrent
, styler
)
1770 && !IsPackageLine(lineCurrent
+ 1, styler
))
1771 isPackageLine
= true;
1776 case SCE_PL_HERE_QQ
:
1777 case SCE_PL_HERE_Q
:
1778 case SCE_PL_HERE_QX
:
1779 switch (stylePrevCh
) {
1780 case SCE_PL_HERE_QQ
:
1781 case SCE_PL_HERE_Q
:
1782 case SCE_PL_HERE_QX
:
1791 switch (stylePrevCh
) {
1792 case SCE_PL_HERE_QQ
:
1793 case SCE_PL_HERE_Q
:
1794 case SCE_PL_HERE_QX
:
1805 if (options
.foldCommentExplicit
&& style
== SCE_PL_COMMENTLINE
&& ch
== '#') {
1806 if (chNext
== '{') {
1808 } else if (levelCurrent
> SC_FOLDLEVELBASE
&& chNext
== '}') {
1814 int lev
= levelPrev
;
1815 // POD headings occupy bits 7-4, leaving some breathing room for
1816 // non-standard practice -- POD sections stuck in blocks, etc.
1817 if (podHeading
> 0) {
1818 levelCurrent
= (lev
& ~PERL_HEADFOLD_MASK
) | (podHeading
<< PERL_HEADFOLD_SHIFT
);
1819 lev
= levelCurrent
- 1;
1820 lev
|= SC_FOLDLEVELHEADERFLAG
;
1823 // Check if line was a package declaration
1824 // because packages need "special" treatment
1825 if (isPackageLine
) {
1826 lev
= SC_FOLDLEVELBASE
| SC_FOLDLEVELHEADERFLAG
;
1827 levelCurrent
= SC_FOLDLEVELBASE
+ 1;
1828 isPackageLine
= false;
1830 lev
|= levelCurrent
<< 16;
1831 if (visibleChars
== 0 && options
.foldCompact
)
1832 lev
|= SC_FOLDLEVELWHITEFLAG
;
1833 if ((levelCurrent
> levelPrev
) && (visibleChars
> 0))
1834 lev
|= SC_FOLDLEVELHEADERFLAG
;
1835 if (lev
!= styler
.LevelAt(lineCurrent
)) {
1836 styler
.SetLevel(lineCurrent
, lev
);
1839 levelPrev
= levelCurrent
;
1842 if (!isspacechar(ch
))
1846 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1847 int flagsNext
= styler
.LevelAt(lineCurrent
) & ~SC_FOLDLEVELNUMBERMASK
;
1848 styler
.SetLevel(lineCurrent
, levelPrev
| flagsNext
);
1851 LexerModule
lmPerl(SCLEX_PERL
, LexerPerl::LexerFactoryPerl
, "perl", perlWordListDesc
);