scintilla/lexilla/lexers/LexPerl.cxx

   1 // Scintilla source code edit control
   2 /** @file LexPerl.cxx
   3  ** Lexer for Perl.
   4  ** Converted to lexer object by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   5  **/
   6 // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
   7 // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <string_view>
  19 #include <map>
  20 #include <functional>
  21
  22 #include "ILexer.h"
  23 #include "Scintilla.h"
  24 #include "SciLexer.h"
  25
  26 #include "WordList.h"
  27 #include "LexAccessor.h"
  28 #include "StyleContext.h"
  29 #include "CharacterSet.h"
  30 #include "LexerModule.h"
  31 #include "OptionSet.h"
  32 #include "DefaultLexer.h"
  33
  34 using namespace Scintilla;
  35 using namespace Lexilla;
  36
  37 // Info for HERE document handling from perldata.pod (reformatted):
  38 // ----------------------------------------------------------------
  39 // A line-oriented form of quoting is based on the shell ``here-doc'' syntax.
  40 // Following a << you specify a string to terminate the quoted material, and
  41 // all lines following the current line down to the terminating string are
  42 // the value of the item.
  43 // Prefixing the terminating string with a "~" specifies that you want to
  44 // use "Indented Here-docs" (see below).
  45 // * The terminating string may be either an identifier (a word), or some
  46 //   quoted text.
  47 // * If quoted, the type of quotes you use determines the treatment of the
  48 //   text, just as in regular quoting.
  49 // * An unquoted identifier works like double quotes.
  50 // * There must be no space between the << and the identifier.
  51 //   (If you put a space it will be treated as a null identifier,
  52 //    which is valid, and matches the first empty line.)
  53 //   (This is deprecated, -w warns of this syntax)
  54 // * The terminating string must appear by itself (unquoted and
  55 //   with no surrounding whitespace) on the terminating line.
  56 //
  57 // Indented Here-docs
  58 // ------------------
  59 // The here-doc modifier "~" allows you to indent your here-docs to
  60 // make the code more readable.
  61 // The delimiter is used to determine the exact whitespace to remove
  62 // from the beginning of each line. All lines must have at least the
  63 // same starting whitespace (except lines only containing a newline)
  64 // or perl will croak. Tabs and spaces can be mixed, but are matched
  65 // exactly. One tab will not be equal to 8 spaces!
  66 // Additional beginning whitespace (beyond what preceded the
  67 // delimiter) will be preserved.
  68
  69 #define HERE_DELIM_MAX 256              // maximum length of HERE doc delimiter
  70
  71 #define PERLNUM_BINARY          1       // order is significant: 1-3 cannot have a dot
  72 #define PERLNUM_OCTAL           2
  73 #define PERLNUM_FLOAT_EXP       3       // exponent part only
  74 #define PERLNUM_HEX                     4       // may be a hex float
  75 #define PERLNUM_DECIMAL         5       // 1-5 are numbers; 6-7 are strings
  76 #define PERLNUM_VECTOR          6
  77 #define PERLNUM_V_VECTOR        7
  78 #define PERLNUM_BAD                     8
  79
  80 #define BACK_NONE               0       // lookback state for bareword disambiguation:
  81 #define BACK_OPERATOR   1       // whitespace/comments are insignificant
  82 #define BACK_KEYWORD    2       // operators/keywords are needed for disambiguation
  83
  84 #define SUB_BEGIN               0       // states for subroutine prototype scan:
  85 #define SUB_HAS_PROTO   1       // only 'prototype' attribute allows prototypes
  86 #define SUB_HAS_ATTRIB  2       // other attributes can exist leftward
  87 #define SUB_HAS_MODULE  3       // sub name can have a ::identifier part
  88 #define SUB_HAS_SUB             4       // 'sub' keyword
  89
  90 // all interpolated styles are different from their parent styles by a constant difference
  91 // we also assume SCE_PL_STRING_VAR is the interpolated style with the smallest value
  92 #define INTERPOLATE_SHIFT       (SCE_PL_STRING_VAR - SCE_PL_STRING)
  93
  94 static bool isPerlKeyword(Sci_PositionU start, Sci_PositionU end, WordList &keywords, LexAccessor &styler) {
  95         // old-style keyword matcher; needed because GetCurrent() needs
  96         // current segment to be committed, but we may abandon early...
  97         char s[100];
  98         Sci_PositionU i, len = end - start;
  99         if (len > 30) { len = 30; }
 100         for (i = 0; i < len; i++, start++) s[i] = styler[start];
 101         s[i] = '\0';
 102         return keywords.InList(s);
 103 }
 104
 105 static int disambiguateBareword(LexAccessor &styler, Sci_PositionU bk, Sci_PositionU fw,
 106         int backFlag, Sci_PositionU backPos, Sci_PositionU endPos) {
 107         // identifiers are recognized by Perl as barewords under some
 108         // conditions, the following attempts to do the disambiguation
 109         // by looking backward and forward; result in 2 LSB
 110         int result = 0;
 111         bool moreback = false;          // true if passed newline/comments
 112         bool brace = false;                     // true if opening brace found
 113         // if BACK_NONE, neither operator nor keyword, so skip test
 114         if (backFlag == BACK_NONE)
 115                 return result;
 116         // first look backwards past whitespace/comments to set EOL flag
 117         // (some disambiguation patterns must be on a single line)
 118         if (backPos <= static_cast<Sci_PositionU>(styler.LineStart(styler.GetLine(bk))))
 119                 moreback = true;
 120         // look backwards at last significant lexed item for disambiguation
 121         bk = backPos - 1;
 122         int ch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 123         if (ch == '{' && !moreback) {
 124                 // {bareword: possible variable spec
 125                 brace = true;
 126         } else if ((ch == '&' && styler.SafeGetCharAt(bk - 1) != '&')
 127                 // &bareword: subroutine call
 128                 || styler.Match(bk - 1, "->")
 129                 // ->bareword: part of variable spec
 130                 || styler.Match(bk - 1, "::")
 131                 // ::bareword: part of module spec
 132                 || styler.Match(bk - 2, "sub")) {
 133                 // sub bareword: subroutine declaration
 134                 // (implied BACK_KEYWORD, no keywords end in 'sub'!)
 135                 result |= 1;
 136         }
 137         // next, scan forward after word past tab/spaces only;
 138         // if ch isn't one of '[{(,' we can skip the test
 139         if ((ch == '{' || ch == '(' || ch == '['|| ch == ',')
 140                 && fw < endPos) {
 141                 while (IsASpaceOrTab(ch = static_cast<unsigned char>(styler.SafeGetCharAt(fw)))
 142                         && fw < endPos) {
 143                         fw++;
 144                 }
 145                 if ((ch == '}' && brace)
 146                         // {bareword}: variable spec
 147                         || styler.Match(fw, "=>")) {
 148                         // [{(, bareword=>: hash literal
 149                         result |= 2;
 150                 }
 151         }
 152         return result;
 153 }
 154
 155 static void skipWhitespaceComment(LexAccessor &styler, Sci_PositionU &p) {
 156         // when backtracking, we need to skip whitespace and comments
 157         while (p > 0) {
 158                 const int style = styler.StyleAt(p);
 159                 if (style != SCE_PL_DEFAULT && style != SCE_PL_COMMENTLINE)
 160                         break;
 161                 p--;
 162         }
 163 }
 164
 165 static int findPrevLexeme(LexAccessor &styler, Sci_PositionU &bk, int &style) {
 166         // scan backward past whitespace and comments to find a lexeme
 167         skipWhitespaceComment(styler, bk);
 168         if (bk == 0)
 169                 return 0;
 170         int sz = 1;
 171         style = styler.StyleAt(bk);
 172         while (bk > 0) {        // find extent of lexeme
 173                 if (styler.StyleAt(bk - 1) == style) {
 174                         bk--; sz++;
 175                 } else
 176                         break;
 177         }
 178         return sz;
 179 }
 180
 181 static int styleBeforeBracePair(LexAccessor &styler, Sci_PositionU bk) {
 182         // backtrack to find open '{' corresponding to a '}', balanced
 183         // return significant style to be tested for '/' disambiguation
 184         int braceCount = 1;
 185         if (bk == 0)
 186                 return SCE_PL_DEFAULT;
 187         while (--bk > 0) {
 188                 if (styler.StyleAt(bk) == SCE_PL_OPERATOR) {
 189                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
 190                         if (bkch == ';') {      // early out
 191                                 break;
 192                         } else if (bkch == '}') {
 193                                 braceCount++;
 194                         } else if (bkch == '{') {
 195                                 if (--braceCount == 0) break;
 196                         }
 197                 }
 198         }
 199         if (bk > 0 && braceCount == 0) {
 200                 // balanced { found, bk > 0, skip more whitespace/comments
 201                 bk--;
 202                 skipWhitespaceComment(styler, bk);
 203                 return styler.StyleAt(bk);
 204         }
 205         return SCE_PL_DEFAULT;
 206 }
 207
 208 static int styleCheckIdentifier(LexAccessor &styler, Sci_PositionU bk) {
 209         // backtrack to classify sub-styles of identifier under test
 210         // return sub-style to be tested for '/' disambiguation
 211         if (styler.SafeGetCharAt(bk) == '>')    // inputsymbol, like <foo>
 212                 return 1;
 213         // backtrack to check for possible "->" or "::" before identifier
 214         while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
 215                 bk--;
 216         }
 217         while (bk > 0) {
 218                 int bkstyle = styler.StyleAt(bk);
 219                 if (bkstyle == SCE_PL_DEFAULT
 220                         || bkstyle == SCE_PL_COMMENTLINE) {
 221                         // skip whitespace, comments
 222                 } else if (bkstyle == SCE_PL_OPERATOR) {
 223                         // test for "->" and "::"
 224                         if (styler.Match(bk - 1, "->") || styler.Match(bk - 1, "::"))
 225                                 return 2;
 226                 } else
 227                         return 3;       // bare identifier
 228                 bk--;
 229         }
 230         return 0;
 231 }
 232
 233 static int podLineScan(LexAccessor &styler, Sci_PositionU &pos, Sci_PositionU endPos) {
 234         // forward scan the current line to classify line for POD style
 235         int state = -1;
 236         while (pos < endPos) {
 237                 int ch = static_cast<unsigned char>(styler.SafeGetCharAt(pos));
 238                 if (ch == '\n' || ch == '\r') {
 239                         if (ch == '\r' && styler.SafeGetCharAt(pos + 1) == '\n') pos++;
 240                         break;
 241                 }
 242                 if (IsASpaceOrTab(ch)) {        // whitespace, take note
 243                         if (state == -1)
 244                                 state = SCE_PL_DEFAULT;
 245                 } else if (state == SCE_PL_DEFAULT) {   // verbatim POD line
 246                         state = SCE_PL_POD_VERB;
 247                 } else if (state != SCE_PL_POD_VERB) {  // regular POD line
 248                         state = SCE_PL_POD;
 249                 }
 250                 pos++;
 251         }
 252         if (state == -1)
 253                 state = SCE_PL_DEFAULT;
 254         return state;
 255 }
 256
 257 static bool styleCheckSubPrototype(LexAccessor &styler, Sci_PositionU bk) {
 258         // backtrack to identify if we're starting a subroutine prototype
 259         // we also need to ignore whitespace/comments, format is like:
 260         //     sub abc::pqr :const :prototype(...)
 261         // lexemes are tested in pairs, e.g. '::'+'pqr', ':'+'const', etc.
 262         // and a state machine generates legal subroutine syntax matches
 263         styler.Flush();
 264         int state = SUB_BEGIN;
 265         do {
 266                 // find two lexemes, lexeme 2 follows lexeme 1
 267                 int style2 = SCE_PL_DEFAULT;
 268                 Sci_PositionU pos2 = bk;
 269                 int len2 = findPrevLexeme(styler, pos2, style2);
 270                 int style1 = SCE_PL_DEFAULT;
 271                 Sci_PositionU pos1 = pos2;
 272                 if (pos1 > 0) pos1--;
 273                 int len1 = findPrevLexeme(styler, pos1, style1);
 274                 if (len1 == 0 || len2 == 0)             // lexeme pair must exist
 275                         break;
 276
 277                 // match parts of syntax, if invalid subroutine syntax, break off
 278                 if (style1 == SCE_PL_OPERATOR && len1 == 1 &&
 279                     styler.SafeGetCharAt(pos1) == ':') {        // ':'
 280                         if (style2 == SCE_PL_IDENTIFIER || style2 == SCE_PL_WORD) {
 281                                 if (len2 == 9 && styler.Match(pos2, "prototype")) {     // ':' 'prototype'
 282                                         if (state == SUB_BEGIN) {
 283                                                 state = SUB_HAS_PROTO;
 284                                         } else
 285                                                 break;
 286                                 } else {        // ':' <attribute>
 287                                         if (state == SUB_HAS_PROTO || state == SUB_HAS_ATTRIB) {
 288                                                 state = SUB_HAS_ATTRIB;
 289                                         } else
 290                                                 break;
 291                                 }
 292                         } else
 293                                 break;
 294                 } else if (style1 == SCE_PL_OPERATOR && len1 == 2 &&
 295                            styler.Match(pos1, "::")) {  // '::'
 296                         if (style2 == SCE_PL_IDENTIFIER) {      // '::' <identifier>
 297                                 state = SUB_HAS_MODULE;
 298                         } else
 299                                 break;
 300                 } else if (style1 == SCE_PL_WORD && len1 == 3 &&
 301                            styler.Match(pos1, "sub")) { // 'sub'
 302                         if (style2 == SCE_PL_IDENTIFIER) {      // 'sub' <identifier>
 303                                 state = SUB_HAS_SUB;
 304                         } else
 305                                 break;
 306                 } else
 307                         break;
 308                 bk = pos1;                      // set position for finding next lexeme pair
 309                 if (bk > 0) bk--;
 310         } while (state != SUB_HAS_SUB);
 311         return (state == SUB_HAS_SUB);
 312 }
 313
 314 static int actualNumStyle(int numberStyle) {
 315         if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
 316                 return SCE_PL_STRING;
 317         } else if (numberStyle == PERLNUM_BAD) {
 318                 return SCE_PL_ERROR;
 319         }
 320         return SCE_PL_NUMBER;
 321 }
 322
 323 static int opposite(int ch) {
 324         if (ch == '(') return ')';
 325         if (ch == '[') return ']';
 326         if (ch == '{') return '}';
 327         if (ch == '<') return '>';
 328         return ch;
 329 }
 330
 331 static bool IsCommentLine(Sci_Position line, LexAccessor &styler) {
 332         Sci_Position pos = styler.LineStart(line);
 333         Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
 334         for (Sci_Position i = pos; i < eol_pos; i++) {
 335                 char ch = styler[i];
 336                 int style = styler.StyleAt(i);
 337                 if (ch == '#' && style == SCE_PL_COMMENTLINE)
 338                         return true;
 339                 else if (!IsASpaceOrTab(ch))
 340                         return false;
 341         }
 342         return false;
 343 }
 344
 345 static bool IsPackageLine(Sci_Position line, LexAccessor &styler) {
 346         Sci_Position pos = styler.LineStart(line);
 347         int style = styler.StyleAt(pos);
 348         if (style == SCE_PL_WORD && styler.Match(pos, "package")) {
 349                 return true;
 350         }
 351         return false;
 352 }
 353
 354 static int PodHeadingLevel(Sci_Position pos, LexAccessor &styler) {
 355         int lvl = static_cast<unsigned char>(styler.SafeGetCharAt(pos + 5));
 356         if (lvl >= '1' && lvl <= '4') {
 357                 return lvl - '0';
 358         }
 359         return 0;
 360 }
 361
 362 // An individual named option for use in an OptionSet
 363
 364 // Options used for LexerPerl
 365 struct OptionsPerl {
 366         bool fold;
 367         bool foldComment;
 368         bool foldCompact;
 369         // Custom folding of POD and packages
 370         bool foldPOD;            // fold.perl.pod
 371         // Enable folding Pod blocks when using the Perl lexer.
 372         bool foldPackage;        // fold.perl.package
 373         // Enable folding packages when using the Perl lexer.
 374
 375         bool foldCommentExplicit;
 376
 377         bool foldAtElse;
 378
 379         OptionsPerl() {
 380                 fold = false;
 381                 foldComment = false;
 382                 foldCompact = true;
 383                 foldPOD = true;
 384                 foldPackage = true;
 385                 foldCommentExplicit = true;
 386                 foldAtElse = false;
 387         }
 388 };
 389
 390 static const char *const perlWordListDesc[] = {
 391         "Keywords",
 392         0
 393 };
 394
 395 struct OptionSetPerl : public OptionSet<OptionsPerl> {
 396         OptionSetPerl() {
 397                 DefineProperty("fold", &OptionsPerl::fold);
 398
 399                 DefineProperty("fold.comment", &OptionsPerl::foldComment);
 400
 401                 DefineProperty("fold.compact", &OptionsPerl::foldCompact);
 402
 403                 DefineProperty("fold.perl.pod", &OptionsPerl::foldPOD,
 404                         "Set to 0 to disable folding Pod blocks when using the Perl lexer.");
 405
 406                 DefineProperty("fold.perl.package", &OptionsPerl::foldPackage,
 407                         "Set to 0 to disable folding packages when using the Perl lexer.");
 408
 409                 DefineProperty("fold.perl.comment.explicit", &OptionsPerl::foldCommentExplicit,
 410                         "Set to 0 to disable explicit folding.");
 411
 412                 DefineProperty("fold.perl.at.else", &OptionsPerl::foldAtElse,
 413                                "This option enables Perl folding on a \"} else {\" line of an if statement.");
 414
 415                 DefineWordListSets(perlWordListDesc);
 416         }
 417 };
 418
 419 class LexerPerl : public DefaultLexer {
 420         CharacterSet setWordStart;
 421         CharacterSet setWord;
 422         CharacterSet setSpecialVar;
 423         CharacterSet setControlVar;
 424         WordList keywords;
 425         OptionsPerl options;
 426         OptionSetPerl osPerl;
 427 public:
 428         LexerPerl() :
 429                 DefaultLexer("perl", SCLEX_PERL),
 430                 setWordStart(CharacterSet::setAlpha, "_", 0x80, true),
 431                 setWord(CharacterSet::setAlphaNum, "_", 0x80, true),
 432                 setSpecialVar(CharacterSet::setNone, "\"$;<>&`'+,./\\%:=~!?@[]"),
 433                 setControlVar(CharacterSet::setNone, "ACDEFHILMNOPRSTVWX") {
 434         }
 435         virtual ~LexerPerl() {
 436         }
 437         void SCI_METHOD Release() override {
 438                 delete this;
 439         }
 440         int SCI_METHOD Version() const override {
 441                 return lvRelease5;
 442         }
 443         const char *SCI_METHOD PropertyNames() override {
 444                 return osPerl.PropertyNames();
 445         }
 446         int SCI_METHOD PropertyType(const char *name) override {
 447                 return osPerl.PropertyType(name);
 448         }
 449         const char *SCI_METHOD DescribeProperty(const char *name) override {
 450                 return osPerl.DescribeProperty(name);
 451         }
 452         Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
 453         const char * SCI_METHOD PropertyGet(const char *key) override {
 454                 return osPerl.PropertyGet(key);
 455         }
 456         const char *SCI_METHOD DescribeWordListSets() override {
 457                 return osPerl.DescribeWordListSets();
 458         }
 459         Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
 460         void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 461         void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 462
 463         void *SCI_METHOD PrivateCall(int, void *) override {
 464                 return 0;
 465         }
 466
 467         static ILexer5 *LexerFactoryPerl() {
 468                 return new LexerPerl();
 469         }
 470         int InputSymbolScan(StyleContext &sc);
 471         void InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern=false);
 472 };
 473
 474 Sci_Position SCI_METHOD LexerPerl::PropertySet(const char *key, const char *val) {
 475         if (osPerl.PropertySet(&options, key, val)) {
 476                 return 0;
 477         }
 478         return -1;
 479 }
 480
 481 Sci_Position SCI_METHOD LexerPerl::WordListSet(int n, const char *wl) {
 482         WordList *wordListN = 0;
 483         switch (n) {
 484         case 0:
 485                 wordListN = &keywords;
 486                 break;
 487         }
 488         Sci_Position firstModification = -1;
 489         if (wordListN) {
 490                 WordList wlNew;
 491                 wlNew.Set(wl);
 492                 if (*wordListN != wlNew) {
 493                         wordListN->Set(wl);
 494                         firstModification = 0;
 495                 }
 496         }
 497         return firstModification;
 498 }
 499
 500 int LexerPerl::InputSymbolScan(StyleContext &sc) {
 501         // forward scan for matching > on same line; file handles
 502         int c, sLen = 0;
 503         while ((c = sc.GetRelativeCharacter(++sLen)) != 0) {
 504                 if (c == '\r' || c == '\n') {
 505                         return 0;
 506                 } else if (c == '>') {
 507                         if (sc.Match("<=>"))    // '<=>' case
 508                                 return 0;
 509                         return sLen;
 510                 }
 511         }
 512         return 0;
 513 }
 514
 515 void LexerPerl::InterpolateSegment(StyleContext &sc, int maxSeg, bool isPattern) {
 516         // interpolate a segment (with no active backslashes or delimiters within)
 517         // switch in or out of an interpolation style or continue current style
 518         // commit variable patterns if found, trim segment, repeat until done
 519         while (maxSeg > 0) {
 520                 bool isVar = false;
 521                 int sLen = 0;
 522                 if ((maxSeg > 1) && (sc.ch == '$' || sc.ch == '@')) {
 523                         // $#[$]*word [$@][$]*word (where word or {word} is always present)
 524                         bool braces = false;
 525                         sLen = 1;
 526                         if (sc.ch == '$' && sc.chNext == '#') { // starts with $#
 527                                 sLen++;
 528                         }
 529                         while ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '$'))       // >0 $ dereference within
 530                                 sLen++;
 531                         if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '{')) {        // { start for {word}
 532                                 sLen++;
 533                                 braces = true;
 534                         }
 535                         if (maxSeg > sLen) {
 536                                 int c = sc.GetRelativeCharacter(sLen);
 537                                 if (setWordStart.Contains(c)) { // word (various)
 538                                         sLen++;
 539                                         isVar = true;
 540                                         while (maxSeg > sLen) {
 541                                                 if (!setWord.Contains(sc.GetRelativeCharacter(sLen)))
 542                                                         break;
 543                                                 sLen++;
 544                                         }
 545                                 } else if (braces && IsADigit(c) && (sLen == 2)) {      // digit for ${digit}
 546                                         sLen++;
 547                                         isVar = true;
 548                                 }
 549                         }
 550                         if (braces) {
 551                                 if ((maxSeg > sLen) && (sc.GetRelativeCharacter(sLen) == '}')) {        // } end for {word}
 552                                         sLen++;
 553                                 } else
 554                                         isVar = false;
 555                         }
 556                 }
 557                 if (!isVar && (maxSeg > 1)) {   // $- or @-specific variable patterns
 558                         int c = sc.chNext;
 559                         if (sc.ch == '$') {
 560                                 sLen = 1;
 561                                 if (IsADigit(c)) {      // $[0-9] and slurp trailing digits
 562                                         sLen++;
 563                                         isVar = true;
 564                                         while ((maxSeg > sLen) && IsADigit(sc.GetRelativeCharacter(sLen)))
 565                                                 sLen++;
 566                                 } else if (setSpecialVar.Contains(c)) { // $ special variables
 567                                         sLen++;
 568                                         isVar = true;
 569                                 } else if (!isPattern && ((c == '(') || (c == ')') || (c == '|'))) {    // $ additional
 570                                         sLen++;
 571                                         isVar = true;
 572                                 } else if (c == '^') {  // $^A control-char style
 573                                         sLen++;
 574                                         if ((maxSeg > sLen) && setControlVar.Contains(sc.GetRelativeCharacter(sLen))) {
 575                                                 sLen++;
 576                                                 isVar = true;
 577                                         }
 578                                 }
 579                         } else if (sc.ch == '@') {
 580                                 sLen = 1;
 581                                 if (!isPattern && ((c == '+') || (c == '-'))) { // @ specials non-pattern
 582                                         sLen++;
 583                                         isVar = true;
 584                                 }
 585                         }
 586                 }
 587                 if (isVar) {    // commit as interpolated variable or normal character
 588                         if (sc.state < SCE_PL_STRING_VAR)
 589                                 sc.SetState(sc.state + INTERPOLATE_SHIFT);
 590                         sc.Forward(sLen);
 591                         maxSeg -= sLen;
 592                 } else {
 593                         if (sc.state >= SCE_PL_STRING_VAR)
 594                                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 595                         sc.Forward();
 596                         maxSeg--;
 597                 }
 598         }
 599         if (sc.state >= SCE_PL_STRING_VAR)
 600                 sc.SetState(sc.state - INTERPOLATE_SHIFT);
 601 }
 602
 603 void SCI_METHOD LexerPerl::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 604         LexAccessor styler(pAccess);
 605
 606         // keywords that forces /PATTERN/ at all times; should track vim's behaviour
 607         WordList reWords;
 608         reWords.Set("elsif if split while");
 609
 610         // charset classes
 611         CharacterSet setSingleCharOp(CharacterSet::setNone, "rwxoRWXOezsfdlpSbctugkTBMAC");
 612         // lexing of "%*</" operators is non-trivial; these are missing in the set below
 613         CharacterSet setPerlOperator(CharacterSet::setNone, "^&\\()-+=|{}[]:;>,?!.~");
 614         CharacterSet setQDelim(CharacterSet::setNone, "qrwx");
 615         CharacterSet setModifiers(CharacterSet::setAlpha);
 616         CharacterSet setPreferRE(CharacterSet::setNone, "*/<%");
 617         // setArray and setHash also accepts chars for special vars like $_,
 618         // which are then truncated when the next char does not match setVar
 619         CharacterSet setVar(CharacterSet::setAlphaNum, "#$_'", 0x80, true);
 620         CharacterSet setArray(CharacterSet::setAlpha, "#$_+-", 0x80, true);
 621         CharacterSet setHash(CharacterSet::setAlpha, "#$_!^+-", 0x80, true);
 622         CharacterSet &setPOD = setModifiers;
 623         CharacterSet setNonHereDoc(CharacterSet::setDigits, "=$@");
 624         CharacterSet setHereDocDelim(CharacterSet::setAlphaNum, "_");
 625         CharacterSet setSubPrototype(CharacterSet::setNone, "\\[$@%&*+];_ \t");
 626         CharacterSet setRepetition(CharacterSet::setDigits, ")\"'");
 627         // for format identifiers
 628         CharacterSet setFormatStart(CharacterSet::setAlpha, "_=");
 629         CharacterSet &setFormat = setHereDocDelim;
 630
 631         // Lexer for perl often has to backtrack to start of current style to determine
 632         // which characters are being used as quotes, how deeply nested is the
 633         // start position and what the termination string is for HERE documents.
 634
 635         class HereDocCls {      // Class to manage HERE doc sequence
 636         public:
 637                 int State;
 638                 // 0: '<<' encountered
 639                 // 1: collect the delimiter
 640                 // 2: here doc text (lines after the delimiter)
 641                 int Quote;              // the char after '<<'
 642                 bool Quoted;            // true if Quote in ('\'','"','`')
 643                 bool StripIndent;       // true if '<<~' requested to strip leading whitespace
 644                 int DelimiterLength;    // strlen(Delimiter)
 645                 char Delimiter[HERE_DELIM_MAX]; // the Delimiter
 646                 HereDocCls() {
 647                         State = 0;
 648                         Quote = 0;
 649                         Quoted = false;
 650                         StripIndent = false;
 651                         DelimiterLength = 0;
 652                         Delimiter[0] = '\0';
 653                 }
 654                 void Append(int ch) {
 655                         Delimiter[DelimiterLength++] = static_cast<char>(ch);
 656                         Delimiter[DelimiterLength] = '\0';
 657                 }
 658                 ~HereDocCls() {
 659                 }
 660         };
 661         HereDocCls HereDoc;             // TODO: FIFO for stacked here-docs
 662
 663         class QuoteCls {        // Class to manage quote pairs
 664         public:
 665                 int Rep;
 666                 int Count;
 667                 int Up, Down;
 668                 QuoteCls() {
 669                         New(1);
 670                 }
 671                 void New(int r = 1) {
 672                         Rep   = r;
 673                         Count = 0;
 674                         Up    = '\0';
 675                         Down  = '\0';
 676                 }
 677                 void Open(int u) {
 678                         Count++;
 679                         Up    = u;
 680                         Down  = opposite(Up);
 681                 }
 682         };
 683         QuoteCls Quote;
 684
 685         // additional state for number lexing
 686         int numState = PERLNUM_DECIMAL;
 687         int dotCount = 0;
 688
 689         Sci_PositionU endPos = startPos + length;
 690
 691         // Backtrack to beginning of style if required...
 692         // If in a long distance lexical state, backtrack to find quote characters.
 693         // Includes strings (may be multi-line), numbers (additional state), format
 694         // bodies, as well as POD sections.
 695         if (initStyle == SCE_PL_HERE_Q
 696             || initStyle == SCE_PL_HERE_QQ
 697             || initStyle == SCE_PL_HERE_QX
 698             || initStyle == SCE_PL_FORMAT
 699             || initStyle == SCE_PL_HERE_QQ_VAR
 700             || initStyle == SCE_PL_HERE_QX_VAR
 701            ) {
 702                 // backtrack through multiple styles to reach the delimiter start
 703                 int delim = (initStyle == SCE_PL_FORMAT) ? SCE_PL_FORMAT_IDENT:SCE_PL_HERE_DELIM;
 704                 while ((startPos > 1) && (styler.StyleAt(startPos) != delim)) {
 705                         startPos--;
 706                 }
 707                 startPos = styler.LineStart(styler.GetLine(startPos));
 708                 initStyle = styler.StyleAt(startPos - 1);
 709         }
 710         if (initStyle == SCE_PL_STRING
 711             || initStyle == SCE_PL_STRING_QQ
 712             || initStyle == SCE_PL_BACKTICKS
 713             || initStyle == SCE_PL_STRING_QX
 714             || initStyle == SCE_PL_REGEX
 715             || initStyle == SCE_PL_STRING_QR
 716             || initStyle == SCE_PL_REGSUBST
 717             || initStyle == SCE_PL_STRING_VAR
 718             || initStyle == SCE_PL_STRING_QQ_VAR
 719             || initStyle == SCE_PL_BACKTICKS_VAR
 720             || initStyle == SCE_PL_STRING_QX_VAR
 721             || initStyle == SCE_PL_REGEX_VAR
 722             || initStyle == SCE_PL_STRING_QR_VAR
 723             || initStyle == SCE_PL_REGSUBST_VAR
 724            ) {
 725                 // for interpolation, must backtrack through a mix of two different styles
 726                 int otherStyle = (initStyle >= SCE_PL_STRING_VAR) ?
 727                         initStyle - INTERPOLATE_SHIFT : initStyle + INTERPOLATE_SHIFT;
 728                 while (startPos > 1) {
 729                         int st = styler.StyleAt(startPos - 1);
 730                         if ((st != initStyle) && (st != otherStyle))
 731                                 break;
 732                         startPos--;
 733                 }
 734                 initStyle = SCE_PL_DEFAULT;
 735         } else if (initStyle == SCE_PL_STRING_Q
 736                 || initStyle == SCE_PL_STRING_QW
 737                 || initStyle == SCE_PL_XLAT
 738                 || initStyle == SCE_PL_CHARACTER
 739                 || initStyle == SCE_PL_NUMBER
 740                 || initStyle == SCE_PL_IDENTIFIER
 741                 || initStyle == SCE_PL_ERROR
 742                 || initStyle == SCE_PL_SUB_PROTOTYPE
 743            ) {
 744                 while ((startPos > 1) && (styler.StyleAt(startPos - 1) == initStyle)) {
 745                         startPos--;
 746                 }
 747                 initStyle = SCE_PL_DEFAULT;
 748         } else if (initStyle == SCE_PL_POD
 749                 || initStyle == SCE_PL_POD_VERB
 750                   ) {
 751                 // POD backtracking finds preceding blank lines and goes back past them
 752                 Sci_Position ln = styler.GetLine(startPos);
 753                 if (ln > 0) {
 754                         initStyle = styler.StyleAt(styler.LineStart(--ln));
 755                         if (initStyle == SCE_PL_POD || initStyle == SCE_PL_POD_VERB) {
 756                                 while (ln > 0 && styler.GetLineState(ln) == SCE_PL_DEFAULT)
 757                                         ln--;
 758                         }
 759                         startPos = styler.LineStart(++ln);
 760                         initStyle = styler.StyleAt(startPos - 1);
 761                 } else {
 762                         startPos = 0;
 763                         initStyle = SCE_PL_DEFAULT;
 764                 }
 765         }
 766
 767         // backFlag, backPos are additional state to aid identifier corner cases.
 768         // Look backwards past whitespace and comments in order to detect either
 769         // operator or keyword. Later updated as we go along.
 770         int backFlag = BACK_NONE;
 771         Sci_PositionU backPos = startPos;
 772         if (backPos > 0) {
 773                 backPos--;
 774                 skipWhitespaceComment(styler, backPos);
 775                 if (styler.StyleAt(backPos) == SCE_PL_OPERATOR)
 776                         backFlag = BACK_OPERATOR;
 777                 else if (styler.StyleAt(backPos) == SCE_PL_WORD)
 778                         backFlag = BACK_KEYWORD;
 779                 backPos++;
 780         }
 781
 782         StyleContext sc(startPos, endPos - startPos, initStyle, styler);
 783
 784         for (; sc.More(); sc.Forward()) {
 785
 786                 // Determine if the current state should terminate.
 787                 switch (sc.state) {
 788                 case SCE_PL_OPERATOR:
 789                         sc.SetState(SCE_PL_DEFAULT);
 790                         backFlag = BACK_OPERATOR;
 791                         backPos = sc.currentPos;
 792                         break;
 793                 case SCE_PL_IDENTIFIER:         // identifier, bareword, inputsymbol
 794                         if ((!setWord.Contains(sc.ch) && sc.ch != '\'')
 795                                 || sc.Match('.', '.')
 796                                 || sc.chPrev == '>') {  // end of inputsymbol
 797                                 sc.SetState(SCE_PL_DEFAULT);
 798                         }
 799                         break;
 800                 case SCE_PL_WORD:               // keyword, plus special cases
 801                         if (!setWord.Contains(sc.ch)) {
 802                                 char s[100];
 803                                 sc.GetCurrent(s, sizeof(s));
 804                                 if ((strcmp(s, "__DATA__") == 0) || (strcmp(s, "__END__") == 0)) {
 805                                         sc.ChangeState(SCE_PL_DATASECTION);
 806                                 } else {
 807                                         if ((strcmp(s, "format") == 0)) {
 808                                                 sc.SetState(SCE_PL_FORMAT_IDENT);
 809                                                 HereDoc.State = 0;
 810                                         } else {
 811                                                 sc.SetState(SCE_PL_DEFAULT);
 812                                         }
 813                                         backFlag = BACK_KEYWORD;
 814                                         backPos = sc.currentPos;
 815                                 }
 816                         }
 817                         break;
 818                 case SCE_PL_SCALAR:
 819                 case SCE_PL_ARRAY:
 820                 case SCE_PL_HASH:
 821                 case SCE_PL_SYMBOLTABLE:
 822                         if (sc.Match(':', ':')) {       // skip ::
 823                                 sc.Forward();
 824                         } else if (!setVar.Contains(sc.ch)) {
 825                                 if (sc.LengthCurrent() == 1) {
 826                                         // Special variable: $(, $_ etc.
 827                                         sc.Forward();
 828                                 }
 829                                 sc.SetState(SCE_PL_DEFAULT);
 830                         }
 831                         break;
 832                 case SCE_PL_NUMBER:
 833                         // if no early break, number style is terminated at "(go through)"
 834                         if (sc.ch == '.') {
 835                                 if (sc.chNext == '.') {
 836                                         // double dot is always an operator (go through)
 837                                 } else if (numState <= PERLNUM_FLOAT_EXP) {
 838                                         // non-decimal number or float exponent, consume next dot
 839                                         sc.SetState(SCE_PL_OPERATOR);
 840                                         break;
 841                                 } else {        // decimal or vectors allows dots
 842                                         dotCount++;
 843                                         if (numState == PERLNUM_DECIMAL) {
 844                                                 if (dotCount <= 1)      // number with one dot in it
 845                                                         break;
 846                                                 if (IsADigit(sc.chNext)) {      // really a vector
 847                                                         numState = PERLNUM_VECTOR;
 848                                                         break;
 849                                                 }
 850                                                 // number then dot (go through)
 851                                         } else if (numState == PERLNUM_HEX) {
 852                                                 if (dotCount <= 1 && IsADigit(sc.chNext, 16)) {
 853                                                         break;  // hex with one dot is a hex float
 854                                                 } else {
 855                                                         sc.SetState(SCE_PL_OPERATOR);
 856                                                         break;
 857                                                 }
 858                                                 // hex then dot (go through)
 859                                         } else if (IsADigit(sc.chNext)) // vectors
 860                                                 break;
 861                                         // vector then dot (go through)
 862                                 }
 863                         } else if (sc.ch == '_') {
 864                                 // permissive underscoring for number and vector literals
 865                                 break;
 866                         } else if (numState == PERLNUM_DECIMAL) {
 867                                 if (sc.ch == 'E' || sc.ch == 'e') {     // exponent, sign
 868                                         numState = PERLNUM_FLOAT_EXP;
 869                                         if (sc.chNext == '+' || sc.chNext == '-') {
 870                                                 sc.Forward();
 871                                         }
 872                                         break;
 873                                 } else if (IsADigit(sc.ch))
 874                                         break;
 875                                 // number then word (go through)
 876                         } else if (numState == PERLNUM_HEX) {
 877                                 if (sc.ch == 'P' || sc.ch == 'p') {     // hex float exponent, sign
 878                                         numState = PERLNUM_FLOAT_EXP;
 879                                         if (sc.chNext == '+' || sc.chNext == '-') {
 880                                                 sc.Forward();
 881                                         }
 882                                         break;
 883                                 } else if (IsADigit(sc.ch, 16))
 884                                         break;
 885                                 // hex or hex float then word (go through)
 886                         } else if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
 887                                 if (IsADigit(sc.ch))    // vector
 888                                         break;
 889                                 if (setWord.Contains(sc.ch) && dotCount == 0) { // change to word
 890                                         sc.ChangeState(SCE_PL_IDENTIFIER);
 891                                         break;
 892                                 }
 893                                 // vector then word (go through)
 894                         } else if (IsADigit(sc.ch)) {
 895                                 if (numState == PERLNUM_FLOAT_EXP) {
 896                                         break;
 897                                 } else if (numState == PERLNUM_OCTAL) {
 898                                         if (sc.ch <= '7') break;
 899                                 } else if (numState == PERLNUM_BINARY) {
 900                                         if (sc.ch <= '1') break;
 901                                 }
 902                                 // mark invalid octal, binary numbers (go through)
 903                                 numState = PERLNUM_BAD;
 904                                 break;
 905                         }
 906                         // complete current number or vector
 907                         sc.ChangeState(actualNumStyle(numState));
 908                         sc.SetState(SCE_PL_DEFAULT);
 909                         break;
 910                 case SCE_PL_COMMENTLINE:
 911                         if (sc.atLineStart) {
 912                                 sc.SetState(SCE_PL_DEFAULT);
 913                         }
 914                         break;
 915                 case SCE_PL_HERE_DELIM:
 916                         if (HereDoc.State == 0) { // '<<' encountered
 917                                 int delim_ch = sc.chNext;
 918                                 Sci_Position ws_skip = 0;
 919                                 HereDoc.State = 1;      // pre-init HERE doc class
 920                                 HereDoc.Quote = sc.chNext;
 921                                 HereDoc.Quoted = false;
 922                                 HereDoc.StripIndent = false;
 923                                 HereDoc.DelimiterLength = 0;
 924                                 HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
 925                                 if (delim_ch == '~') { // was actually '<<~'
 926                                         sc.Forward();
 927                                         HereDoc.StripIndent = true;
 928                                         HereDoc.Quote = delim_ch = sc.chNext;
 929                                 }
 930                                 if (IsASpaceOrTab(delim_ch)) {
 931                                         // skip whitespace; legal only for quoted delimiters
 932                                         Sci_PositionU i = sc.currentPos + 1;
 933                                         while ((i < endPos) && IsASpaceOrTab(delim_ch)) {
 934                                                 i++;
 935                                                 delim_ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 936                                         }
 937                                         ws_skip = i - sc.currentPos - 1;
 938                                 }
 939                                 if (delim_ch == '\'' || delim_ch == '"' || delim_ch == '`') {
 940                                         // a quoted here-doc delimiter; skip any whitespace
 941                                         sc.Forward(ws_skip + 1);
 942                                         HereDoc.Quote = delim_ch;
 943                                         HereDoc.Quoted = true;
 944                                 } else if ((ws_skip == 0 && setNonHereDoc.Contains(sc.chNext))
 945                                         || ws_skip > 0) {
 946                                         // left shift << or <<= operator cases
 947                                         // restore position if operator
 948                                         sc.ChangeState(SCE_PL_OPERATOR);
 949                                         sc.ForwardSetState(SCE_PL_DEFAULT);
 950                                         backFlag = BACK_OPERATOR;
 951                                         backPos = sc.currentPos;
 952                                         HereDoc.State = 0;
 953                                 } else {
 954                                         // specially handle initial '\' for identifier
 955                                         if (ws_skip == 0 && HereDoc.Quote == '\\')
 956                                                 sc.Forward();
 957                                         // an unquoted here-doc delimiter, no special handling
 958                                         // (cannot be prefixed by spaces/tabs), or
 959                                         // symbols terminates; deprecated zero-length delimiter
 960                                 }
 961                         } else if (HereDoc.State == 1) { // collect the delimiter
 962                                 backFlag = BACK_NONE;
 963                                 if (HereDoc.Quoted) { // a quoted here-doc delimiter
 964                                         if (sc.ch == HereDoc.Quote) { // closing quote => end of delimiter
 965                                                 sc.ForwardSetState(SCE_PL_DEFAULT);
 966                                         } else if (!sc.atLineEnd) {
 967                                                 if (sc.Match('\\', static_cast<char>(HereDoc.Quote))) { // escaped quote
 968                                                         sc.Forward();
 969                                                 }
 970                                                 if (sc.ch != '\r') {    // skip CR if CRLF
 971                                                         int i = 0;                      // else append char, possibly an extended char
 972                                                         while (i < sc.width) {
 973                                                                 HereDoc.Append(static_cast<unsigned char>(styler.SafeGetCharAt(sc.currentPos + i)));
 974                                                                 i++;
 975                                                         }
 976                                                 }
 977                                         }
 978                                 } else { // an unquoted here-doc delimiter, no extended charsets
 979                                         if (setHereDocDelim.Contains(sc.ch)) {
 980                                                 HereDoc.Append(sc.ch);
 981                                         } else {
 982                                                 sc.SetState(SCE_PL_DEFAULT);
 983                                         }
 984                                 }
 985                                 if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
 986                                         sc.SetState(SCE_PL_ERROR);
 987                                         HereDoc.State = 0;
 988                                 }
 989                         }
 990                         break;
 991                 case SCE_PL_HERE_Q:
 992                 case SCE_PL_HERE_QQ:
 993                 case SCE_PL_HERE_QX:
 994                         // also implies HereDoc.State == 2
 995                         sc.Complete();
 996                         if (HereDoc.StripIndent) {
 997                                 // skip whitespace
 998                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
 999                                         sc.Forward();
1000                         }
1001                         if (HereDoc.DelimiterLength == 0 || sc.Match(HereDoc.Delimiter)) {
1002                                 int c = sc.GetRelative(HereDoc.DelimiterLength);
1003                                 if (c == '\r' || c == '\n') {   // peek first, do not consume match
1004                                         sc.ForwardBytes(HereDoc.DelimiterLength);
1005                                         sc.SetState(SCE_PL_DEFAULT);
1006                                         backFlag = BACK_NONE;
1007                                         HereDoc.State = 0;
1008                                         if (!sc.atLineEnd)
1009                                                 sc.Forward();
1010                                         break;
1011                                 }
1012                         }
1013                         if (sc.state == SCE_PL_HERE_Q) {        // \EOF and 'EOF' non-interpolated
1014                                 while (!sc.atLineEnd)
1015                                         sc.Forward();
1016                                 break;
1017                         }
1018                         while (!sc.atLineEnd) {         // "EOF" and `EOF` interpolated
1019                                 int c, sLen = 0, endType = 0;
1020                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1021                                         // scan to break string into segments
1022                                         if (c == '\\') {
1023                                                 endType = 1; break;
1024                                         } else if (c == '\r' || c == '\n') {
1025                                                 endType = 2; break;
1026                                         }
1027                                         sLen++;
1028                                 }
1029                                 if (sLen > 0)   // process non-empty segments
1030                                         InterpolateSegment(sc, sLen);
1031                                 if (endType == 1) {
1032                                         sc.Forward();
1033                                         // \ at end-of-line does not appear to have any effect, skip
1034                                         if (sc.ch != '\r' && sc.ch != '\n')
1035                                                 sc.Forward();
1036                                 } else if (endType == 2) {
1037                                         if (!sc.atLineEnd)
1038                                                 sc.Forward();
1039                                 }
1040                         }
1041                         break;
1042                 case SCE_PL_POD:
1043                 case SCE_PL_POD_VERB: {
1044                                 Sci_PositionU fw = sc.currentPos;
1045                                 Sci_Position ln = styler.GetLine(fw);
1046                                 if (sc.atLineStart && sc.Match("=cut")) {       // end of POD
1047                                         sc.SetState(SCE_PL_POD);
1048                                         sc.Forward(4);
1049                                         sc.SetState(SCE_PL_DEFAULT);
1050                                         styler.SetLineState(ln, SCE_PL_POD);
1051                                         break;
1052                                 }
1053                                 int pod = podLineScan(styler, fw, endPos);      // classify POD line
1054                                 styler.SetLineState(ln, pod);
1055                                 if (pod == SCE_PL_DEFAULT) {
1056                                         if (sc.state == SCE_PL_POD_VERB) {
1057                                                 Sci_PositionU fw2 = fw;
1058                                                 while (fw2 < (endPos - 1) && pod == SCE_PL_DEFAULT) {
1059                                                         fw = fw2++;     // penultimate line (last blank line)
1060                                                         pod = podLineScan(styler, fw2, endPos);
1061                                                         styler.SetLineState(styler.GetLine(fw2), pod);
1062                                                 }
1063                                                 if (pod == SCE_PL_POD) {        // truncate verbatim POD early
1064                                                         sc.SetState(SCE_PL_POD);
1065                                                 } else
1066                                                         fw = fw2;
1067                                         }
1068                                 } else {
1069                                         if (pod == SCE_PL_POD_VERB      // still part of current paragraph
1070                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD)) {
1071                                                 pod = SCE_PL_POD;
1072                                                 styler.SetLineState(ln, pod);
1073                                         } else if (pod == SCE_PL_POD
1074                                                 && (styler.GetLineState(ln - 1) == SCE_PL_POD_VERB)) {
1075                                                 pod = SCE_PL_POD_VERB;
1076                                                 styler.SetLineState(ln, pod);
1077                                         }
1078                                         sc.SetState(pod);
1079                                 }
1080                                 sc.ForwardBytes(fw - sc.currentPos);    // commit style
1081                         }
1082                         break;
1083                 case SCE_PL_REGEX:
1084                 case SCE_PL_STRING_QR:
1085                         if (Quote.Rep <= 0) {
1086                                 if (!setModifiers.Contains(sc.ch))
1087                                         sc.SetState(SCE_PL_DEFAULT);
1088                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
1089                                 Quote.Open(sc.ch);
1090                         } else {
1091                                 int c, sLen = 0, endType = 0;
1092                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1093                                         // scan to break string into segments
1094                                         if (IsASpace(c)) {
1095                                                 break;
1096                                         } else if (c == '\\' && Quote.Up != '\\') {
1097                                                 endType = 1; break;
1098                                         } else if (c == Quote.Down) {
1099                                                 Quote.Count--;
1100                                                 if (Quote.Count == 0) {
1101                                                         Quote.Rep--;
1102                                                         break;
1103                                                 }
1104                                         } else if (c == Quote.Up)
1105                                                 Quote.Count++;
1106                                         sLen++;
1107                                 }
1108                                 if (sLen > 0) { // process non-empty segments
1109                                         if (Quote.Up != '\'') {
1110                                                 InterpolateSegment(sc, sLen, true);
1111                                         } else          // non-interpolated path
1112                                                 sc.Forward(sLen);
1113                                 }
1114                                 if (endType == 1)
1115                                         sc.Forward();
1116                         }
1117                         break;
1118                 case SCE_PL_REGSUBST:
1119                 case SCE_PL_XLAT:
1120                         if (Quote.Rep <= 0) {
1121                                 if (!setModifiers.Contains(sc.ch))
1122                                         sc.SetState(SCE_PL_DEFAULT);
1123                         } else if (!Quote.Up && !IsASpace(sc.ch)) {
1124                                 Quote.Open(sc.ch);
1125                         } else {
1126                                 int c, sLen = 0, endType = 0;
1127                                 bool isPattern = (Quote.Rep == 2);
1128                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1129                                         // scan to break string into segments
1130                                         if (c == '\\' && Quote.Up != '\\') {
1131                                                 endType = 2; break;
1132                                         } else if (Quote.Count == 0 && Quote.Rep == 1) {
1133                                                 // We matched something like s(...) or tr{...}, Perl 5.10
1134                                                 // appears to allow almost any character for use as the
1135                                                 // next delimiters. Whitespace and comments are accepted in
1136                                                 // between, but we'll limit to whitespace here.
1137                                                 // For '#', if no whitespace in between, it's a delimiter.
1138                                                 if (IsASpace(c)) {
1139                                                         // Keep going
1140                                                 } else if (c == '#' && IsASpaceOrTab(sc.GetRelativeCharacter(sLen - 1))) {
1141                                                         endType = 3;
1142                                                 } else
1143                                                         Quote.Open(c);
1144                                                 break;
1145                                         } else if (c == Quote.Down) {
1146                                                 Quote.Count--;
1147                                                 if (Quote.Count == 0) {
1148                                                         Quote.Rep--;
1149                                                         endType = 1;
1150                                                 }
1151                                                 if (Quote.Up == Quote.Down)
1152                                                         Quote.Count++;
1153                                                 if (endType == 1)
1154                                                         break;
1155                                         } else if (c == Quote.Up) {
1156                                                 Quote.Count++;
1157                                         } else if (IsASpace(c))
1158                                                 break;
1159                                         sLen++;
1160                                 }
1161                                 if (sLen > 0) { // process non-empty segments
1162                                         if (sc.state == SCE_PL_REGSUBST && Quote.Up != '\'') {
1163                                                 InterpolateSegment(sc, sLen, isPattern);
1164                                         } else          // non-interpolated path
1165                                                 sc.Forward(sLen);
1166                                 }
1167                                 if (endType == 2) {
1168                                         sc.Forward();
1169                                 } else if (endType == 3)
1170                                         sc.SetState(SCE_PL_DEFAULT);
1171                         }
1172                         break;
1173                 case SCE_PL_STRING_Q:
1174                 case SCE_PL_STRING_QQ:
1175                 case SCE_PL_STRING_QX:
1176                 case SCE_PL_STRING_QW:
1177                 case SCE_PL_STRING:
1178                 case SCE_PL_CHARACTER:
1179                 case SCE_PL_BACKTICKS:
1180                         if (!Quote.Down && !IsASpace(sc.ch)) {
1181                                 Quote.Open(sc.ch);
1182                         } else {
1183                                 int c, sLen = 0, endType = 0;
1184                                 while ((c = sc.GetRelativeCharacter(sLen)) != 0) {
1185                                         // scan to break string into segments
1186                                         if (IsASpace(c)) {
1187                                                 break;
1188                                         } else if (c == '\\' && Quote.Up != '\\') {
1189                                                 endType = 2; break;
1190                                         } else if (c == Quote.Down) {
1191                                                 Quote.Count--;
1192                                                 if (Quote.Count == 0) {
1193                                                         endType = 3; break;
1194                                                 }
1195                                         } else if (c == Quote.Up)
1196                                                 Quote.Count++;
1197                                         sLen++;
1198                                 }
1199                                 if (sLen > 0) { // process non-empty segments
1200                                         switch (sc.state) {
1201                                         case SCE_PL_STRING:
1202                                         case SCE_PL_STRING_QQ:
1203                                         case SCE_PL_BACKTICKS:
1204                                                 InterpolateSegment(sc, sLen);
1205                                                 break;
1206                                         case SCE_PL_STRING_QX:
1207                                                 if (Quote.Up != '\'') {
1208                                                         InterpolateSegment(sc, sLen);
1209                                                         break;
1210                                                 }
1211                                                 // (continued for ' delim)
1212                                                 // Falls through.
1213                                         default:        // non-interpolated path
1214                                                 sc.Forward(sLen);
1215                                         }
1216                                 }
1217                                 if (endType == 2) {
1218                                         sc.Forward();
1219                                 } else if (endType == 3)
1220                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1221                         }
1222                         break;
1223                 case SCE_PL_SUB_PROTOTYPE: {
1224                                 int i = 0;
1225                                 // forward scan; must all be valid proto characters
1226                                 while (setSubPrototype.Contains(sc.GetRelative(i)))
1227                                         i++;
1228                                 if (sc.GetRelative(i) == ')') { // valid sub prototype
1229                                         sc.ForwardBytes(i);
1230                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1231                                 } else {
1232                                         // abandon prototype, restart from '('
1233                                         sc.ChangeState(SCE_PL_OPERATOR);
1234                                         sc.SetState(SCE_PL_DEFAULT);
1235                                 }
1236                         }
1237                         break;
1238                 case SCE_PL_FORMAT: {
1239                                 sc.Complete();
1240                                 if (sc.Match('.')) {
1241                                         sc.Forward();
1242                                         if (sc.atLineEnd || ((sc.ch == '\r' && sc.chNext == '\n')))
1243                                                 sc.SetState(SCE_PL_DEFAULT);
1244                                 }
1245                                 while (!sc.atLineEnd)
1246                                         sc.Forward();
1247                         }
1248                         break;
1249                 case SCE_PL_ERROR:
1250                         break;
1251                 }
1252                 // Needed for specific continuation styles (one follows the other)
1253                 switch (sc.state) {
1254                         // continued from SCE_PL_WORD
1255                 case SCE_PL_FORMAT_IDENT:
1256                         // occupies HereDoc state 3 to avoid clashing with HERE docs
1257                         if (IsASpaceOrTab(sc.ch)) {             // skip whitespace
1258                                 sc.ChangeState(SCE_PL_DEFAULT);
1259                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1260                                         sc.Forward();
1261                                 sc.SetState(SCE_PL_FORMAT_IDENT);
1262                         }
1263                         if (setFormatStart.Contains(sc.ch)) {   // identifier or '='
1264                                 if (sc.ch != '=') {
1265                                         do {
1266                                                 sc.Forward();
1267                                         } while (setFormat.Contains(sc.ch));
1268                                 }
1269                                 while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd)
1270                                         sc.Forward();
1271                                 if (sc.ch == '=') {
1272                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1273                                         HereDoc.State = 3;
1274                                 } else {
1275                                         // invalid identifier; inexact fallback, but hey
1276                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1277                                         sc.SetState(SCE_PL_DEFAULT);
1278                                 }
1279                         } else {
1280                                 sc.ChangeState(SCE_PL_DEFAULT); // invalid identifier
1281                         }
1282                         backFlag = BACK_NONE;
1283                         break;
1284                 }
1285
1286                 // Must check end of HereDoc states here before default state is handled
1287                 if (HereDoc.State == 1 && sc.atLineEnd) {
1288                         // Begin of here-doc (the line after the here-doc delimiter):
1289                         // Lexically, the here-doc starts from the next line after the >>, but the
1290                         // first line of here-doc seem to follow the style of the last EOL sequence
1291                         int st_new = SCE_PL_HERE_QQ;
1292                         HereDoc.State = 2;
1293                         if (HereDoc.Quoted) {
1294                                 if (sc.state == SCE_PL_HERE_DELIM) {
1295                                         // Missing quote at end of string! We are stricter than perl.
1296                                         // Colour here-doc anyway while marking this bit as an error.
1297                                         sc.ChangeState(SCE_PL_ERROR);
1298                                 }
1299                                 switch (HereDoc.Quote) {
1300                                 case '\'':
1301                                         st_new = SCE_PL_HERE_Q;
1302                                         break;
1303                                 case '"' :
1304                                         st_new = SCE_PL_HERE_QQ;
1305                                         break;
1306                                 case '`' :
1307                                         st_new = SCE_PL_HERE_QX;
1308                                         break;
1309                                 }
1310                         } else {
1311                                 if (HereDoc.Quote == '\\')
1312                                         st_new = SCE_PL_HERE_Q;
1313                         }
1314                         sc.SetState(st_new);
1315                 }
1316                 if (HereDoc.State == 3 && sc.atLineEnd) {
1317                         // Start of format body.
1318                         HereDoc.State = 0;
1319                         sc.SetState(SCE_PL_FORMAT);
1320                 }
1321
1322                 // Determine if a new state should be entered.
1323                 if (sc.state == SCE_PL_DEFAULT) {
1324                         if (IsADigit(sc.ch) ||
1325                                 (IsADigit(sc.chNext) && (sc.ch == '.' || sc.ch == 'v'))) {
1326                                 sc.SetState(SCE_PL_NUMBER);
1327                                 backFlag = BACK_NONE;
1328                                 numState = PERLNUM_DECIMAL;
1329                                 dotCount = 0;
1330                                 if (sc.ch == '0') {             // hex,bin,octal
1331                                         if (sc.chNext == 'x' || sc.chNext == 'X') {
1332                                                 numState = PERLNUM_HEX;
1333                                         } else if (sc.chNext == 'b' || sc.chNext == 'B') {
1334                                                 numState = PERLNUM_BINARY;
1335                                         } else if (IsADigit(sc.chNext)) {
1336                                                 numState = PERLNUM_OCTAL;
1337                                         }
1338                                         if (numState != PERLNUM_DECIMAL) {
1339                                                 sc.Forward();
1340                                         }
1341                                 } else if (sc.ch == 'v') {              // vector
1342                                         numState = PERLNUM_V_VECTOR;
1343                                 }
1344                         } else if (setWord.Contains(sc.ch)) {
1345                                 // if immediately prefixed by '::', always a bareword
1346                                 sc.SetState(SCE_PL_WORD);
1347                                 if (sc.chPrev == ':' && sc.GetRelative(-2) == ':') {
1348                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1349                                 }
1350                                 Sci_PositionU bk = sc.currentPos;
1351                                 Sci_PositionU fw = sc.currentPos + 1;
1352                                 // first check for possible quote-like delimiter
1353                                 if (sc.ch == 's' && !setWord.Contains(sc.chNext)) {
1354                                         sc.ChangeState(SCE_PL_REGSUBST);
1355                                         Quote.New(2);
1356                                 } else if (sc.ch == 'm' && !setWord.Contains(sc.chNext)) {
1357                                         sc.ChangeState(SCE_PL_REGEX);
1358                                         Quote.New();
1359                                 } else if (sc.ch == 'q' && !setWord.Contains(sc.chNext)) {
1360                                         sc.ChangeState(SCE_PL_STRING_Q);
1361                                         Quote.New();
1362                                 } else if (sc.ch == 'y' && !setWord.Contains(sc.chNext)) {
1363                                         sc.ChangeState(SCE_PL_XLAT);
1364                                         Quote.New(2);
1365                                 } else if (sc.Match('t', 'r') && !setWord.Contains(sc.GetRelative(2))) {
1366                                         sc.ChangeState(SCE_PL_XLAT);
1367                                         Quote.New(2);
1368                                         sc.Forward();
1369                                         fw++;
1370                                 } else if (sc.ch == 'q' && setQDelim.Contains(sc.chNext)
1371                                         && !setWord.Contains(sc.GetRelative(2))) {
1372                                         if (sc.chNext == 'q') sc.ChangeState(SCE_PL_STRING_QQ);
1373                                         else if (sc.chNext == 'x') sc.ChangeState(SCE_PL_STRING_QX);
1374                                         else if (sc.chNext == 'r') sc.ChangeState(SCE_PL_STRING_QR);
1375                                         else sc.ChangeState(SCE_PL_STRING_QW);  // sc.chNext == 'w'
1376                                         Quote.New();
1377                                         sc.Forward();
1378                                         fw++;
1379                                 } else if (sc.ch == 'x' && (sc.chNext == '=' || // repetition
1380                                         !setWord.Contains(sc.chNext) ||
1381                                         (setRepetition.Contains(sc.chPrev) && IsADigit(sc.chNext)))) {
1382                                         sc.ChangeState(SCE_PL_OPERATOR);
1383                                 }
1384                                 // if potentially a keyword, scan forward and grab word, then check
1385                                 // if it's really one; if yes, disambiguation test is performed
1386                                 // otherwise it is always a bareword and we skip a lot of scanning
1387                                 if (sc.state == SCE_PL_WORD) {
1388                                         while (setWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(fw))))
1389                                                 fw++;
1390                                         if (!isPerlKeyword(styler.GetStartSegment(), fw, keywords, styler)) {
1391                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1392                                         }
1393                                 }
1394                                 // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
1395                                 // for quote-like delimiters/keywords, attempt to disambiguate
1396                                 // to select for bareword, change state -> SCE_PL_IDENTIFIER
1397                                 if (sc.state != SCE_PL_IDENTIFIER && bk > 0) {
1398                                         if (disambiguateBareword(styler, bk, fw, backFlag, backPos, endPos))
1399                                                 sc.ChangeState(SCE_PL_IDENTIFIER);
1400                                 }
1401                                 backFlag = BACK_NONE;
1402                         } else if (sc.ch == '#') {
1403                                 sc.SetState(SCE_PL_COMMENTLINE);
1404                         } else if (sc.ch == '\"') {
1405                                 sc.SetState(SCE_PL_STRING);
1406                                 Quote.New();
1407                                 Quote.Open(sc.ch);
1408                                 backFlag = BACK_NONE;
1409                         } else if (sc.ch == '\'') {
1410                                 if (sc.chPrev == '&' && setWordStart.Contains(sc.chNext)) {
1411                                         // Archaic call
1412                                         sc.SetState(SCE_PL_IDENTIFIER);
1413                                 } else {
1414                                         sc.SetState(SCE_PL_CHARACTER);
1415                                         Quote.New();
1416                                         Quote.Open(sc.ch);
1417                                 }
1418                                 backFlag = BACK_NONE;
1419                         } else if (sc.ch == '`') {
1420                                 sc.SetState(SCE_PL_BACKTICKS);
1421                                 Quote.New();
1422                                 Quote.Open(sc.ch);
1423                                 backFlag = BACK_NONE;
1424                         } else if (sc.ch == '$') {
1425                                 sc.SetState(SCE_PL_SCALAR);
1426                                 if (sc.chNext == '{') {
1427                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1428                                 } else if (IsASpace(sc.chNext)) {
1429                                         sc.ForwardSetState(SCE_PL_DEFAULT);
1430                                 } else {
1431                                         sc.Forward();
1432                                         if (sc.Match('`', '`') || sc.Match(':', ':')) {
1433                                                 sc.Forward();
1434                                         }
1435                                 }
1436                                 backFlag = BACK_NONE;
1437                         } else if (sc.ch == '@') {
1438                                 sc.SetState(SCE_PL_ARRAY);
1439                                 if (setArray.Contains(sc.chNext)) {
1440                                         // no special treatment
1441                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1442                                         sc.ForwardBytes(2);
1443                                 } else if (sc.chNext == '{' || sc.chNext == '[') {
1444                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1445                                 } else {
1446                                         sc.ChangeState(SCE_PL_OPERATOR);
1447                                 }
1448                                 backFlag = BACK_NONE;
1449                         } else if (setPreferRE.Contains(sc.ch)) {
1450                                 // Explicit backward peeking to set a consistent preferRE for
1451                                 // any slash found, so no longer need to track preferRE state.
1452                                 // Find first previous significant lexed element and interpret.
1453                                 // A few symbols shares this code for disambiguation.
1454                                 bool preferRE = false;
1455                                 bool isHereDoc = sc.Match('<', '<');
1456                                 bool hereDocSpace = false;              // for: SCALAR [whitespace] '<<'
1457                                 Sci_PositionU bk = (sc.currentPos > 0) ? sc.currentPos - 1: 0;
1458                                 sc.Complete();
1459                                 styler.Flush();
1460                                 if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
1461                                         hereDocSpace = true;
1462                                 skipWhitespaceComment(styler, bk);
1463                                 if (bk == 0) {
1464                                         // avoid backward scanning breakage
1465                                         preferRE = true;
1466                                 } else {
1467                                         int bkstyle = styler.StyleAt(bk);
1468                                         int bkch = static_cast<unsigned char>(styler.SafeGetCharAt(bk));
1469                                         switch (bkstyle) {
1470                                         case SCE_PL_OPERATOR:
1471                                                 preferRE = true;
1472                                                 if (bkch == ')' || bkch == ']') {
1473                                                         preferRE = false;
1474                                                 } else if (bkch == '}') {
1475                                                         // backtrack by counting balanced brace pairs
1476                                                         // needed to test for variables like ${}, @{} etc.
1477                                                         bkstyle = styleBeforeBracePair(styler, bk);
1478                                                         if (bkstyle == SCE_PL_SCALAR
1479                                                                 || bkstyle == SCE_PL_ARRAY
1480                                                                 || bkstyle == SCE_PL_HASH
1481                                                                 || bkstyle == SCE_PL_SYMBOLTABLE
1482                                                                 || bkstyle == SCE_PL_OPERATOR) {
1483                                                                 preferRE = false;
1484                                                         }
1485                                                 } else if (bkch == '+' || bkch == '-') {
1486                                                         if (bkch == static_cast<unsigned char>(styler.SafeGetCharAt(bk - 1))
1487                                                                 && bkch != static_cast<unsigned char>(styler.SafeGetCharAt(bk - 2)))
1488                                                                 // exceptions for operators: unary suffixes ++, --
1489                                                                 preferRE = false;
1490                                                 }
1491                                                 break;
1492                                         case SCE_PL_IDENTIFIER:
1493                                                 preferRE = true;
1494                                                 bkstyle = styleCheckIdentifier(styler, bk);
1495                                                 if ((bkstyle == 1) || (bkstyle == 2)) {
1496                                                         // inputsymbol or var with "->" or "::" before identifier
1497                                                         preferRE = false;
1498                                                 } else if (bkstyle == 3) {
1499                                                         // bare identifier, test cases follows:
1500                                                         if (sc.ch == '/') {
1501                                                                 // if '/', /PATTERN/ unless digit/space immediately after '/'
1502                                                                 // if '//', always expect defined-or operator to follow identifier
1503                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1504                                                                         preferRE = false;
1505                                                         } else if (sc.ch == '*' || sc.ch == '%') {
1506                                                                 if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1507                                                                         preferRE = false;
1508                                                         } else if (sc.ch == '<') {
1509                                                                 if (IsASpace(sc.chNext) || sc.chNext == '=')
1510                                                                         preferRE = false;
1511                                                         }
1512                                                 }
1513                                                 break;
1514                                         case SCE_PL_SCALAR:             // for $var<< case:
1515                                                 if (isHereDoc && hereDocSpace)  // if SCALAR whitespace '<<', *always* a HERE doc
1516                                                         preferRE = true;
1517                                                 break;
1518                                         case SCE_PL_WORD:
1519                                                 preferRE = true;
1520                                                 // for HERE docs, always true
1521                                                 if (sc.ch == '/') {
1522                                                         // adopt heuristics similar to vim-style rules:
1523                                                         // keywords always forced as /PATTERN/: split, if, elsif, while
1524                                                         // everything else /PATTERN/ unless digit/space immediately after '/'
1525                                                         // for '//', defined-or favoured unless special keywords
1526                                                         Sci_PositionU bkend = bk + 1;
1527                                                         while (bk > 0 && styler.StyleAt(bk - 1) == SCE_PL_WORD) {
1528                                                                 bk--;
1529                                                         }
1530                                                         if (isPerlKeyword(bk, bkend, reWords, styler))
1531                                                                 break;
1532                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.chNext == '/')
1533                                                                 preferRE = false;
1534                                                 } else if (sc.ch == '*' || sc.ch == '%') {
1535                                                         if (IsASpace(sc.chNext) || IsADigit(sc.chNext) || sc.Match('*', '*'))
1536                                                                 preferRE = false;
1537                                                 } else if (sc.ch == '<') {
1538                                                         if (IsASpace(sc.chNext) || sc.chNext == '=')
1539                                                                 preferRE = false;
1540                                                 }
1541                                                 break;
1542
1543                                                 // other styles uses the default, preferRE=false
1544                                         case SCE_PL_POD:
1545                                         case SCE_PL_HERE_Q:
1546                                         case SCE_PL_HERE_QQ:
1547                                         case SCE_PL_HERE_QX:
1548                                                 preferRE = true;
1549                                                 break;
1550                                         }
1551                                 }
1552                                 backFlag = BACK_NONE;
1553                                 if (isHereDoc) {        // handle '<<', HERE doc
1554                                         if (sc.Match("<<>>")) {         // double-diamond operator (5.22)
1555                                                 sc.SetState(SCE_PL_OPERATOR);
1556                                                 sc.Forward(3);
1557                                         } else if (preferRE) {
1558                                                 sc.SetState(SCE_PL_HERE_DELIM);
1559                                                 HereDoc.State = 0;
1560                                         } else {                // << operator
1561                                                 sc.SetState(SCE_PL_OPERATOR);
1562                                                 sc.Forward();
1563                                         }
1564                                 } else if (sc.ch == '*') {      // handle '*', typeglob
1565                                         if (preferRE) {
1566                                                 sc.SetState(SCE_PL_SYMBOLTABLE);
1567                                                 if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1568                                                         sc.ForwardBytes(2);
1569                                                 } else if (sc.chNext == '{') {
1570                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1571                                                 } else {
1572                                                         sc.Forward();
1573                                                 }
1574                                         } else {
1575                                                 sc.SetState(SCE_PL_OPERATOR);
1576                                                 if (sc.chNext == '*')   // exponentiation
1577                                                         sc.Forward();
1578                                         }
1579                                 } else if (sc.ch == '%') {      // handle '%', hash
1580                                         if (preferRE) {
1581                                                 sc.SetState(SCE_PL_HASH);
1582                                                 if (setHash.Contains(sc.chNext)) {
1583                                                         sc.Forward();
1584                                                 } else if (sc.chNext == ':' && sc.GetRelative(2) == ':') {
1585                                                         sc.ForwardBytes(2);
1586                                                 } else if (sc.chNext == '{') {
1587                                                         sc.ForwardSetState(SCE_PL_OPERATOR);
1588                                                 } else {
1589                                                         sc.ChangeState(SCE_PL_OPERATOR);
1590                                                 }
1591                                         } else {
1592                                                 sc.SetState(SCE_PL_OPERATOR);
1593                                         }
1594                                 } else if (sc.ch == '<') {      // handle '<', inputsymbol
1595                                         if (preferRE) {
1596                                                 // forward scan
1597                                                 int i = InputSymbolScan(sc);
1598                                                 if (i > 0) {
1599                                                         sc.SetState(SCE_PL_IDENTIFIER);
1600                                                         sc.Forward(i);
1601                                                 } else {
1602                                                         sc.SetState(SCE_PL_OPERATOR);
1603                                                 }
1604                                         } else {
1605                                                 sc.SetState(SCE_PL_OPERATOR);
1606                                         }
1607                                 } else {                        // handle '/', regexp
1608                                         if (preferRE) {
1609                                                 sc.SetState(SCE_PL_REGEX);
1610                                                 Quote.New();
1611                                                 Quote.Open(sc.ch);
1612                                         } else {                // / and // operators
1613                                                 sc.SetState(SCE_PL_OPERATOR);
1614                                                 if (sc.chNext == '/') {
1615                                                         sc.Forward();
1616                                                 }
1617                                         }
1618                                 }
1619                         } else if (sc.ch == '='         // POD
1620                                 && setPOD.Contains(sc.chNext)
1621                                 && sc.atLineStart) {
1622                                 sc.SetState(SCE_PL_POD);
1623                                 backFlag = BACK_NONE;
1624                         } else if (sc.ch == '-' && setWordStart.Contains(sc.chNext)) {  // extended '-' cases
1625                                 Sci_PositionU bk = sc.currentPos;
1626                                 Sci_PositionU fw = 2;
1627                                 if (setSingleCharOp.Contains(sc.chNext) &&      // file test operators
1628                                         !setWord.Contains(sc.GetRelative(2))) {
1629                                         sc.SetState(SCE_PL_WORD);
1630                                 } else {
1631                                         // nominally a minus and bareword; find extent of bareword
1632                                         while (setWord.Contains(sc.GetRelative(fw)))
1633                                                 fw++;
1634                                         sc.SetState(SCE_PL_OPERATOR);
1635                                 }
1636                                 // force to bareword for hash key => or {variable literal} cases
1637                                 if (disambiguateBareword(styler, bk, bk + fw, backFlag, backPos, endPos) & 2) {
1638                                         sc.ChangeState(SCE_PL_IDENTIFIER);
1639                                 }
1640                                 backFlag = BACK_NONE;
1641                         } else if (sc.ch == '(' && sc.currentPos > 0) { // '(' or subroutine prototype
1642                                 sc.Complete();
1643                                 if (styleCheckSubPrototype(styler, sc.currentPos - 1)) {
1644                                         sc.SetState(SCE_PL_SUB_PROTOTYPE);
1645                                         backFlag = BACK_NONE;
1646                                 } else {
1647                                         sc.SetState(SCE_PL_OPERATOR);
1648                                 }
1649                         } else if (setPerlOperator.Contains(sc.ch)) {   // operators
1650                                 sc.SetState(SCE_PL_OPERATOR);
1651                                 if (sc.Match('.', '.')) {       // .. and ...
1652                                         sc.Forward();
1653                                         if (sc.chNext == '.') sc.Forward();
1654                                 }
1655                         } else if (sc.ch == 4 || sc.ch == 26) {         // ^D and ^Z ends valid perl source
1656                                 sc.SetState(SCE_PL_DATASECTION);
1657                         } else {
1658                                 // keep colouring defaults
1659                                 sc.Complete();
1660                         }
1661                 }
1662         }
1663         sc.Complete();
1664         if (sc.state == SCE_PL_HERE_Q
1665                 || sc.state == SCE_PL_HERE_QQ
1666                 || sc.state == SCE_PL_HERE_QX
1667                 || sc.state == SCE_PL_FORMAT) {
1668                 styler.ChangeLexerState(sc.currentPos, styler.Length());
1669         }
1670         sc.Complete();
1671 }
1672
1673 #define PERL_HEADFOLD_SHIFT             4
1674 #define PERL_HEADFOLD_MASK              0xF0
1675
1676 void SCI_METHOD LexerPerl::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
1677
1678         if (!options.fold)
1679                 return;
1680
1681         LexAccessor styler(pAccess);
1682
1683         Sci_PositionU endPos = startPos + length;
1684         int visibleChars = 0;
1685         Sci_Position lineCurrent = styler.GetLine(startPos);
1686
1687         // Backtrack to previous line in case need to fix its fold status
1688         if (startPos > 0) {
1689                 if (lineCurrent > 0) {
1690                         lineCurrent--;
1691                         startPos = styler.LineStart(lineCurrent);
1692                 }
1693         }
1694
1695         int levelPrev = SC_FOLDLEVELBASE;
1696         if (lineCurrent > 0)
1697                 levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
1698         int levelCurrent = levelPrev;
1699         char chNext = styler[startPos];
1700         char chPrev = styler.SafeGetCharAt(startPos - 1);
1701         int styleNext = styler.StyleAt(startPos);
1702         // Used at end of line to determine if the line was a package definition
1703         bool isPackageLine = false;
1704         int podHeading = 0;
1705         for (Sci_PositionU i = startPos; i < endPos; i++) {
1706                 char ch = chNext;
1707                 chNext = styler.SafeGetCharAt(i + 1);
1708                 int style = styleNext;
1709                 styleNext = styler.StyleAt(i + 1);
1710                 int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
1711                 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
1712                 bool atLineStart = ((chPrev == '\r') || (chPrev == '\n')) || i == 0;
1713                 // Comment folding
1714                 if (options.foldComment && atEOL && IsCommentLine(lineCurrent, styler)) {
1715                         if (!IsCommentLine(lineCurrent - 1, styler)
1716                                 && IsCommentLine(lineCurrent + 1, styler))
1717                                 levelCurrent++;
1718                         else if (IsCommentLine(lineCurrent - 1, styler)
1719                                 && !IsCommentLine(lineCurrent + 1, styler))
1720                                 levelCurrent--;
1721                 }
1722                 // {} [] block folding
1723                 if (style == SCE_PL_OPERATOR) {
1724                         if (ch == '{') {
1725                                 if (options.foldAtElse && levelCurrent < levelPrev)
1726                                         --levelPrev;
1727                                 levelCurrent++;
1728                         } else if (ch == '}') {
1729                                 levelCurrent--;
1730                         }
1731                         if (ch == '[') {
1732                                 if (options.foldAtElse && levelCurrent < levelPrev)
1733                                         --levelPrev;
1734                                 levelCurrent++;
1735                         } else if (ch == ']') {
1736                                 levelCurrent--;
1737                         }
1738                 } else if (style == SCE_PL_STRING_QW) {
1739                         // qw
1740                         if (stylePrevCh != style)
1741                                 levelCurrent++;
1742                         else if (styleNext != style)
1743                                 levelCurrent--;
1744                 }
1745                 // POD folding
1746                 if (options.foldPOD && atLineStart) {
1747                         if (style == SCE_PL_POD) {
1748                                 if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
1749                                         levelCurrent++;
1750                                 else if (styler.Match(i, "=cut"))
1751                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1752                                 else if (styler.Match(i, "=head"))
1753                                         podHeading = PodHeadingLevel(i, styler);
1754                         } else if (style == SCE_PL_DATASECTION) {
1755                                 if (ch == '=' && IsASCII(chNext) && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
1756                                         levelCurrent++;
1757                                 else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
1758                                         levelCurrent = (levelCurrent & ~PERL_HEADFOLD_MASK) - 1;
1759                                 else if (styler.Match(i, "=head"))
1760                                         podHeading = PodHeadingLevel(i, styler);
1761                                 // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
1762                                 // reset needed as level test is vs. SC_FOLDLEVELBASE
1763                                 else if (stylePrevCh != SCE_PL_DATASECTION)
1764                                         levelCurrent = SC_FOLDLEVELBASE;
1765                         }
1766                 }
1767                 // package folding
1768                 if (options.foldPackage && atLineStart) {
1769                         if (IsPackageLine(lineCurrent, styler)
1770                                 && !IsPackageLine(lineCurrent + 1, styler))
1771                                 isPackageLine = true;
1772                 }
1773
1774                 //heredoc folding
1775                 switch (style) {
1776                 case SCE_PL_HERE_QQ :
1777                 case SCE_PL_HERE_Q :
1778                 case SCE_PL_HERE_QX :
1779                         switch (stylePrevCh) {
1780                         case SCE_PL_HERE_QQ :
1781                         case SCE_PL_HERE_Q :
1782                         case SCE_PL_HERE_QX :
1783                                 //do nothing;
1784                                 break;
1785                         default :
1786                                 levelCurrent++;
1787                                 break;
1788                         }
1789                         break;
1790                 default:
1791                         switch (stylePrevCh) {
1792                         case SCE_PL_HERE_QQ :
1793                         case SCE_PL_HERE_Q :
1794                         case SCE_PL_HERE_QX :
1795                                 levelCurrent--;
1796                                 break;
1797                         default :
1798                                 //do nothing;
1799                                 break;
1800                         }
1801                         break;
1802                 }
1803
1804                 //explicit folding
1805                 if (options.foldCommentExplicit && style == SCE_PL_COMMENTLINE && ch == '#') {
1806                         if (chNext == '{') {
1807                                 levelCurrent++;
1808                         } else if (levelCurrent > SC_FOLDLEVELBASE  && chNext == '}') {
1809                                 levelCurrent--;
1810                         }
1811                 }
1812
1813                 if (atEOL) {
1814                         int lev = levelPrev;
1815                         // POD headings occupy bits 7-4, leaving some breathing room for
1816                         // non-standard practice -- POD sections stuck in blocks, etc.
1817                         if (podHeading > 0) {
1818                                 levelCurrent = (lev & ~PERL_HEADFOLD_MASK) | (podHeading << PERL_HEADFOLD_SHIFT);
1819                                 lev = levelCurrent - 1;
1820                                 lev |= SC_FOLDLEVELHEADERFLAG;
1821                                 podHeading = 0;
1822                         }
1823                         // Check if line was a package declaration
1824                         // because packages need "special" treatment
1825                         if (isPackageLine) {
1826                                 lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
1827                                 levelCurrent = SC_FOLDLEVELBASE + 1;
1828                                 isPackageLine = false;
1829                         }
1830                         lev |= levelCurrent << 16;
1831                         if (visibleChars == 0 && options.foldCompact)
1832                                 lev |= SC_FOLDLEVELWHITEFLAG;
1833                         if ((levelCurrent > levelPrev) && (visibleChars > 0))
1834                                 lev |= SC_FOLDLEVELHEADERFLAG;
1835                         if (lev != styler.LevelAt(lineCurrent)) {
1836                                 styler.SetLevel(lineCurrent, lev);
1837                         }
1838                         lineCurrent++;
1839                         levelPrev = levelCurrent;
1840                         visibleChars = 0;
1841                 }
1842                 if (!isspacechar(ch))
1843                         visibleChars++;
1844                 chPrev = ch;
1845         }
1846         // Fill in the real level of the next line, keeping the current flags as they will be filled in later
1847         int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1848         styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1849 }
1850
1851 LexerModule lmPerl(SCLEX_PERL, LexerPerl::LexerFactoryPerl, "perl", perlWordListDesc);