scintilla/lexilla/lexers/LexPython.cxx

   1 // Scintilla source code edit control
   2 /** @file LexPython.cxx
   3  ** Lexer for Python.
   4  **/
   5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <cstdlib>
   9 #include <cassert>
  10 #include <cstring>
  11
  12 #include <string>
  13 #include <string_view>
  14 #include <vector>
  15 #include <map>
  16 #include <algorithm>
  17 #include <functional>
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21 #include "SciLexer.h"
  22
  23 #include "StringCopy.h"
  24 #include "WordList.h"
  25 #include "LexAccessor.h"
  26 #include "Accessor.h"
  27 #include "StyleContext.h"
  28 #include "CharacterSet.h"
  29 #include "CharacterCategory.h"
  30 #include "LexerModule.h"
  31 #include "OptionSet.h"
  32 #include "SubStyles.h"
  33 #include "DefaultLexer.h"
  34
  35 using namespace Scintilla;
  36 using namespace Lexilla;
  37
  38 namespace {
  39 // Use an unnamed namespace to protect the functions and classes from name conflicts
  40
  41 /* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may
  42    have arbitrary expressions in {}.  The tokens in the expressions are lexed as if
  43    they were outside of any string.  Expressions may contain { and } characters as
  44    long as there is a closing } for every {, may be 2+ lines in a triple quoted
  45    string, and may have a formatting specifier following a ! or :, but both !
  46    and : are valid inside of a bracketed expression and != is a valid
  47    expression token even outside of a bracketed expression.
  48
  49    When in an f-string expression, the lexer keeps track of the state value of
  50    the f-string and the nesting count for the expression (# of [, (, { seen - # of
  51    }, ), ] seen).  f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
  52    states and nesting counts is kept.  If a f-string expression continues beyond
  53    the end of a line, this stack is saved in a std::map that maps a line number to
  54    the stack at the end of that line.  std::vector is used for the stack.
  55
  56    The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
  57 */
  58 struct SingleFStringExpState {
  59         int state;
  60         int nestingCount;
  61 };
  62
  63 /* kwCDef, kwCTypeName only used for Cython */
  64 enum kwType { kwOther, kwClass, kwDef, kwImport, kwCDef, kwCTypeName, kwCPDef };
  65
  66 enum literalsAllowed { litNone = 0, litU = 1, litB = 2, litF = 4 };
  67
  68 constexpr int indicatorWhitespace = 1;
  69
  70 bool IsPyComment(Accessor &styler, Sci_Position pos, Sci_Position len) {
  71         return len > 0 && styler[pos] == '#';
  72 }
  73
  74 bool IsPyStringTypeChar(int ch, literalsAllowed allowed) noexcept {
  75         return
  76                 ((allowed & litB) && (ch == 'b' || ch == 'B')) ||
  77                 ((allowed & litU) && (ch == 'u' || ch == 'U')) ||
  78                 ((allowed & litF) && (ch == 'f' || ch == 'F'));
  79 }
  80
  81 bool IsPyStringStart(int ch, int chNext, int chNext2, literalsAllowed allowed) noexcept {
  82         if (ch == '\'' || ch == '"')
  83                 return true;
  84         if (IsPyStringTypeChar(ch, allowed)) {
  85                 if (chNext == '"' || chNext == '\'')
  86                         return true;
  87                 if ((chNext == 'r' || chNext == 'R') && (chNext2 == '"' || chNext2 == '\''))
  88                         return true;
  89         }
  90         if ((ch == 'r' || ch == 'R') && (chNext == '"' || chNext == '\''))
  91                 return true;
  92
  93         return false;
  94 }
  95
  96 bool IsPyFStringState(int st) noexcept {
  97         return ((st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING) ||
  98                 (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
  99 }
 100
 101 bool IsPySingleQuoteStringState(int st) noexcept {
 102         return ((st == SCE_P_CHARACTER) || (st == SCE_P_STRING) ||
 103                 (st == SCE_P_FCHARACTER) || (st == SCE_P_FSTRING));
 104 }
 105
 106 bool IsPyTripleQuoteStringState(int st) noexcept {
 107         return ((st == SCE_P_TRIPLE) || (st == SCE_P_TRIPLEDOUBLE) ||
 108                 (st == SCE_P_FTRIPLE) || (st == SCE_P_FTRIPLEDOUBLE));
 109 }
 110
 111 char GetPyStringQuoteChar(int st) noexcept {
 112         if ((st == SCE_P_CHARACTER) || (st == SCE_P_FCHARACTER) ||
 113                         (st == SCE_P_TRIPLE) || (st == SCE_P_FTRIPLE))
 114                 return '\'';
 115         if ((st == SCE_P_STRING) || (st == SCE_P_FSTRING) ||
 116                         (st == SCE_P_TRIPLEDOUBLE) || (st == SCE_P_FTRIPLEDOUBLE))
 117                 return '"';
 118
 119         return '\0';
 120 }
 121
 122 void PushStateToStack(int state, std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) {
 123         SingleFStringExpState single = {state, 0};
 124         stack.push_back(single);
 125
 126         currentFStringExp = &stack.back();
 127 }
 128
 129 int PopFromStateStack(std::vector<SingleFStringExpState> &stack, SingleFStringExpState *&currentFStringExp) noexcept {
 130         int state = 0;
 131
 132         if (!stack.empty()) {
 133                 state = stack.back().state;
 134                 stack.pop_back();
 135         }
 136
 137         if (stack.empty()) {
 138                 currentFStringExp = nullptr;
 139         } else {
 140                 currentFStringExp = &stack.back();
 141         }
 142
 143         return state;
 144 }
 145
 146 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
 147 int GetPyStringState(Accessor &styler, Sci_Position i, Sci_PositionU *nextIndex, literalsAllowed allowed) {
 148         char ch = styler.SafeGetCharAt(i);
 149         char chNext = styler.SafeGetCharAt(i + 1);
 150         const int firstIsF = (ch == 'f' || ch == 'F');
 151
 152         // Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
 153         if (ch == 'r' || ch == 'R') {
 154                 i++;
 155                 ch = styler.SafeGetCharAt(i);
 156                 chNext = styler.SafeGetCharAt(i + 1);
 157         } else if (IsPyStringTypeChar(ch, allowed)) {
 158                 if (chNext == 'r' || chNext == 'R')
 159                         i += 2;
 160                 else
 161                         i += 1;
 162                 ch = styler.SafeGetCharAt(i);
 163                 chNext = styler.SafeGetCharAt(i + 1);
 164         }
 165
 166         if (ch != '"' && ch != '\'') {
 167                 *nextIndex = i + 1;
 168                 return SCE_P_DEFAULT;
 169         }
 170
 171         if (ch == chNext && ch == styler.SafeGetCharAt(i + 2)) {
 172                 *nextIndex = i + 3;
 173
 174                 if (ch == '"')
 175                         return (firstIsF ? SCE_P_FTRIPLEDOUBLE : SCE_P_TRIPLEDOUBLE);
 176                 else
 177                         return (firstIsF ? SCE_P_FTRIPLE : SCE_P_TRIPLE);
 178         } else {
 179                 *nextIndex = i + 1;
 180
 181                 if (ch == '"')
 182                         return (firstIsF ? SCE_P_FSTRING : SCE_P_STRING);
 183                 else
 184                         return (firstIsF ? SCE_P_FCHARACTER : SCE_P_CHARACTER);
 185         }
 186 }
 187
 188 inline bool IsAWordChar(int ch, bool unicodeIdentifiers) {
 189         if (IsASCII(ch))
 190                 return (IsAlphaNumeric(ch) || ch == '.' || ch == '_');
 191
 192         if (!unicodeIdentifiers)
 193                 return false;
 194
 195         // Python uses the XID_Continue set from Unicode data
 196         return IsXidContinue(ch);
 197 }
 198
 199 inline bool IsAWordStart(int ch, bool unicodeIdentifiers) {
 200         if (IsASCII(ch))
 201                 return (IsUpperOrLowerCase(ch) || ch == '_');
 202
 203         if (!unicodeIdentifiers)
 204                 return false;
 205
 206         // Python uses the XID_Start set from Unicode data
 207         return IsXidStart(ch);
 208 }
 209
 210 bool IsFirstNonWhitespace(Sci_Position pos, Accessor &styler) {
 211         const Sci_Position line = styler.GetLine(pos);
 212         const Sci_Position start_pos = styler.LineStart(line);
 213         for (Sci_Position i = start_pos; i < pos; i++) {
 214                 const char ch = styler[i];
 215                 if (!(ch == ' ' || ch == '\t'))
 216                         return false;
 217         }
 218         return true;
 219 }
 220
 221 // Options used for LexerPython
 222 struct OptionsPython {
 223         int whingeLevel;
 224         bool base2or8Literals;
 225         bool stringsU;
 226         bool stringsB;
 227         bool stringsF;
 228         bool stringsOverNewline;
 229         bool keywords2NoSubIdentifiers;
 230         bool fold;
 231         bool foldQuotes;
 232         bool foldCompact;
 233         bool unicodeIdentifiers;
 234
 235         OptionsPython() {
 236                 whingeLevel = 0;
 237                 base2or8Literals = true;
 238                 stringsU = true;
 239                 stringsB = true;
 240                 stringsF = true;
 241                 stringsOverNewline = false;
 242                 keywords2NoSubIdentifiers = false;
 243                 fold = false;
 244                 foldQuotes = false;
 245                 foldCompact = false;
 246                 unicodeIdentifiers = true;
 247         }
 248
 249         literalsAllowed AllowedLiterals() const noexcept {
 250                 literalsAllowed allowedLiterals = stringsU ? litU : litNone;
 251                 if (stringsB)
 252                         allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litB);
 253                 if (stringsF)
 254                         allowedLiterals = static_cast<literalsAllowed>(allowedLiterals | litF);
 255                 return allowedLiterals;
 256         }
 257 };
 258
 259 const char *const pythonWordListDesc[] = {
 260         "Keywords",
 261         "Highlighted identifiers",
 262         nullptr
 263 };
 264
 265 struct OptionSetPython : public OptionSet<OptionsPython> {
 266         OptionSetPython() {
 267                 DefineProperty("tab.timmy.whinge.level", &OptionsPython::whingeLevel,
 268                                "For Python code, checks whether indenting is consistent. "
 269                                "The default, 0 turns off indentation checking, "
 270                                "1 checks whether each line is potentially inconsistent with the previous line, "
 271                                "2 checks whether any space characters occur before a tab character in the indentation, "
 272                                "3 checks whether any spaces are in the indentation, and "
 273                                "4 checks for any tab characters in the indentation. "
 274                                "1 is a good level to use.");
 275
 276                 DefineProperty("lexer.python.literals.binary", &OptionsPython::base2or8Literals,
 277                                "Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.");
 278
 279                 DefineProperty("lexer.python.strings.u", &OptionsPython::stringsU,
 280                                "Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3.");
 281
 282                 DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB,
 283                                "Set to 0 to not recognise Python 3 bytes literals b\"x\".");
 284
 285                 DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF,
 286                                "Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
 287
 288                 DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline,
 289                                "Set to 1 to allow strings to span newline characters.");
 290
 291                 DefineProperty("lexer.python.keywords2.no.sub.identifiers", &OptionsPython::keywords2NoSubIdentifiers,
 292                                "When enabled, it will not style keywords2 items that are used as a sub-identifier. "
 293                                "Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item.");
 294
 295                 DefineProperty("fold", &OptionsPython::fold);
 296
 297                 DefineProperty("fold.quotes.python", &OptionsPython::foldQuotes,
 298                                "This option enables folding multi-line quoted strings when using the Python lexer.");
 299
 300                 DefineProperty("fold.compact", &OptionsPython::foldCompact);
 301
 302                 DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers,
 303                                "Set to 0 to not recognise Python 3 Unicode identifiers.");
 304
 305                 DefineWordListSets(pythonWordListDesc);
 306         }
 307 };
 308
 309 const char styleSubable[] = { SCE_P_IDENTIFIER, 0 };
 310
 311 LexicalClass lexicalClasses[] = {
 312         // Lexer Python SCLEX_PYTHON SCE_P_:
 313         0, "SCE_P_DEFAULT", "default", "White space",
 314         1, "SCE_P_COMMENTLINE", "comment line", "Comment",
 315         2, "SCE_P_NUMBER", "literal numeric", "Number",
 316         3, "SCE_P_STRING", "literal string", "String",
 317         4, "SCE_P_CHARACTER", "literal string", "Single quoted string",
 318         5, "SCE_P_WORD", "keyword", "Keyword",
 319         6, "SCE_P_TRIPLE", "literal string", "Triple quotes",
 320         7, "SCE_P_TRIPLEDOUBLE", "literal string", "Triple double quotes",
 321         8, "SCE_P_CLASSNAME", "identifier", "Class name definition",
 322         9, "SCE_P_DEFNAME", "identifier", "Function or method name definition",
 323         10, "SCE_P_OPERATOR", "operator", "Operators",
 324         11, "SCE_P_IDENTIFIER", "identifier", "Identifiers",
 325         12, "SCE_P_COMMENTBLOCK", "comment", "Comment-blocks",
 326         13, "SCE_P_STRINGEOL", "error literal string", "End of line where string is not closed",
 327         14, "SCE_P_WORD2", "identifier", "Highlighted identifiers",
 328         15, "SCE_P_DECORATOR", "preprocessor", "Decorators",
 329         16, "SCE_P_FSTRING", "literal string interpolated", "F-String",
 330         17, "SCE_P_FCHARACTER", "literal string interpolated", "Single quoted f-string",
 331         18, "SCE_P_FTRIPLE", "literal string interpolated", "Triple quoted f-string",
 332         19, "SCE_P_FTRIPLEDOUBLE", "literal string interpolated", "Triple double quoted f-string",
 333 };
 334
 335 }
 336
 337 class LexerPython : public DefaultLexer {
 338         WordList keywords;
 339         WordList keywords2;
 340         OptionsPython options;
 341         OptionSetPython osPython;
 342         enum { ssIdentifier };
 343         SubStyles subStyles;
 344         std::map<Sci_Position, std::vector<SingleFStringExpState> > ftripleStateAtEol;
 345 public:
 346         explicit LexerPython() :
 347                 DefaultLexer("python", SCLEX_PYTHON, lexicalClasses, ELEMENTS(lexicalClasses)),
 348                 subStyles(styleSubable, 0x80, 0x40, 0) {
 349         }
 350         ~LexerPython() override {
 351         }
 352         void SCI_METHOD Release() override {
 353                 delete this;
 354         }
 355         int SCI_METHOD Version() const override {
 356                 return lvRelease5;
 357         }
 358         const char *SCI_METHOD PropertyNames() override {
 359                 return osPython.PropertyNames();
 360         }
 361         int SCI_METHOD PropertyType(const char *name) override {
 362                 return osPython.PropertyType(name);
 363         }
 364         const char *SCI_METHOD DescribeProperty(const char *name) override {
 365                 return osPython.DescribeProperty(name);
 366         }
 367         Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
 368         const char * SCI_METHOD PropertyGet(const char *key) override {
 369                 return osPython.PropertyGet(key);
 370         }
 371         const char *SCI_METHOD DescribeWordListSets() override {
 372                 return osPython.DescribeWordListSets();
 373         }
 374         Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
 375         void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 376         void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 377
 378         void *SCI_METHOD PrivateCall(int, void *) override {
 379                 return nullptr;
 380         }
 381
 382         int SCI_METHOD LineEndTypesSupported() override {
 383                 return SC_LINE_END_TYPE_UNICODE;
 384         }
 385
 386         int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) override {
 387                 return subStyles.Allocate(styleBase, numberStyles);
 388         }
 389         int SCI_METHOD SubStylesStart(int styleBase) override {
 390                 return subStyles.Start(styleBase);
 391         }
 392         int SCI_METHOD SubStylesLength(int styleBase) override {
 393                 return subStyles.Length(styleBase);
 394         }
 395         int SCI_METHOD StyleFromSubStyle(int subStyle) override {
 396                 const int styleBase = subStyles.BaseStyle(subStyle);
 397                 return styleBase;
 398         }
 399         int SCI_METHOD PrimaryStyleFromStyle(int style) override {
 400                 return style;
 401         }
 402         void SCI_METHOD FreeSubStyles() override {
 403                 subStyles.Free();
 404         }
 405         void SCI_METHOD SetIdentifiers(int style, const char *identifiers) override {
 406                 subStyles.SetIdentifiers(style, identifiers);
 407         }
 408         int SCI_METHOD DistanceToSecondaryStyles() override {
 409                 return 0;
 410         }
 411         const char *SCI_METHOD GetSubStyleBases() override {
 412                 return styleSubable;
 413         }
 414
 415         static ILexer5 *LexerFactoryPython() {
 416                 return new LexerPython();
 417         }
 418
 419 private:
 420         void ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *&currentFStringExp, bool &inContinuedString);
 421 };
 422
 423 Sci_Position SCI_METHOD LexerPython::PropertySet(const char *key, const char *val) {
 424         if (osPython.PropertySet(&options, key, val)) {
 425                 return 0;
 426         }
 427         return -1;
 428 }
 429
 430 Sci_Position SCI_METHOD LexerPython::WordListSet(int n, const char *wl) {
 431         WordList *wordListN = nullptr;
 432         switch (n) {
 433         case 0:
 434                 wordListN = &keywords;
 435                 break;
 436         case 1:
 437                 wordListN = &keywords2;
 438                 break;
 439         }
 440         Sci_Position firstModification = -1;
 441         if (wordListN) {
 442                 WordList wlNew;
 443                 wlNew.Set(wl);
 444                 if (*wordListN != wlNew) {
 445                         wordListN->Set(wl);
 446                         firstModification = 0;
 447                 }
 448         }
 449         return firstModification;
 450 }
 451
 452 void LexerPython::ProcessLineEnd(StyleContext &sc, std::vector<SingleFStringExpState> &fstringStateStack, SingleFStringExpState *&currentFStringExp, bool &inContinuedString) {
 453         long deepestSingleStateIndex = -1;
 454         unsigned long i;
 455
 456         // Find the deepest single quote state because that string will end; no \ continuation in f-string
 457         for (i = 0; i < fstringStateStack.size(); i++) {
 458                 if (IsPySingleQuoteStringState(fstringStateStack[i].state)) {
 459                         deepestSingleStateIndex = i;
 460                         break;
 461                 }
 462         }
 463
 464         if (deepestSingleStateIndex != -1) {
 465                 sc.SetState(fstringStateStack[deepestSingleStateIndex].state);
 466                 while (fstringStateStack.size() > static_cast<unsigned long>(deepestSingleStateIndex)) {
 467                         PopFromStateStack(fstringStateStack, currentFStringExp);
 468                 }
 469         }
 470         if (!fstringStateStack.empty()) {
 471                 std::pair<Sci_Position, std::vector<SingleFStringExpState> > val;
 472                 val.first = sc.currentLine;
 473                 val.second = fstringStateStack;
 474
 475                 ftripleStateAtEol.insert(val);
 476         }
 477
 478         if ((sc.state == SCE_P_DEFAULT)
 479                         || IsPyTripleQuoteStringState(sc.state)) {
 480                 // Perform colourisation of white space and triple quoted strings at end of each line to allow
 481                 // tab marking to work inside white space and triple quoted strings
 482                 sc.SetState(sc.state);
 483         }
 484         if (IsPySingleQuoteStringState(sc.state)) {
 485                 if (inContinuedString || options.stringsOverNewline) {
 486                         inContinuedString = false;
 487                 } else {
 488                         sc.ChangeState(SCE_P_STRINGEOL);
 489                         sc.ForwardSetState(SCE_P_DEFAULT);
 490                 }
 491         }
 492 }
 493
 494 void SCI_METHOD LexerPython::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 495         Accessor styler(pAccess, nullptr);
 496
 497         // Track whether in f-string expression; vector is used for a stack to
 498         // handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
 499         std::vector<SingleFStringExpState> fstringStateStack;
 500         SingleFStringExpState *currentFStringExp = nullptr;
 501
 502         const Sci_Position endPos = startPos + length;
 503
 504         // Backtrack to previous line in case need to fix its tab whinging
 505         Sci_Position lineCurrent = styler.GetLine(startPos);
 506         if (startPos > 0) {
 507                 if (lineCurrent > 0) {
 508                         lineCurrent--;
 509                         // Look for backslash-continued lines
 510                         while (lineCurrent > 0) {
 511                                 const Sci_Position eolPos = styler.LineStart(lineCurrent) - 1;
 512                                 const int eolStyle = styler.StyleAt(eolPos);
 513                                 if (eolStyle == SCE_P_STRING
 514                                                 || eolStyle == SCE_P_CHARACTER
 515                                                 || eolStyle == SCE_P_STRINGEOL) {
 516                                         lineCurrent -= 1;
 517                                 } else {
 518                                         break;
 519                                 }
 520                         }
 521                         startPos = styler.LineStart(lineCurrent);
 522                 }
 523                 initStyle = startPos == 0 ? SCE_P_DEFAULT : styler.StyleAt(startPos - 1);
 524         }
 525
 526         const literalsAllowed allowedLiterals = options.AllowedLiterals();
 527
 528         initStyle = initStyle & 31;
 529         if (initStyle == SCE_P_STRINGEOL) {
 530                 initStyle = SCE_P_DEFAULT;
 531         }
 532
 533         // Set up fstate stack from last line and remove any subsequent ftriple at eol states
 534         std::map<Sci_Position, std::vector<SingleFStringExpState> >::iterator it;
 535         it = ftripleStateAtEol.find(lineCurrent - 1);
 536         if (it != ftripleStateAtEol.end() && !it->second.empty()) {
 537                 fstringStateStack = it->second;
 538                 currentFStringExp = &fstringStateStack.back();
 539         }
 540         it = ftripleStateAtEol.lower_bound(lineCurrent);
 541         if (it != ftripleStateAtEol.end()) {
 542                 ftripleStateAtEol.erase(it, ftripleStateAtEol.end());
 543         }
 544
 545         kwType kwLast = kwOther;
 546         int spaceFlags = 0;
 547         styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
 548         bool base_n_number = false;
 549
 550         const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_P_IDENTIFIER);
 551
 552         StyleContext sc(startPos, endPos - startPos, initStyle, styler);
 553
 554         bool indentGood = true;
 555         Sci_Position startIndicator = sc.currentPos;
 556         bool inContinuedString = false;
 557
 558         for (; sc.More(); sc.Forward()) {
 559
 560                 if (sc.atLineStart) {
 561                         styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
 562                         indentGood = true;
 563                         if (options.whingeLevel == 1) {
 564                                 indentGood = (spaceFlags & wsInconsistent) == 0;
 565                         } else if (options.whingeLevel == 2) {
 566                                 indentGood = (spaceFlags & wsSpaceTab) == 0;
 567                         } else if (options.whingeLevel == 3) {
 568                                 indentGood = (spaceFlags & wsSpace) == 0;
 569                         } else if (options.whingeLevel == 4) {
 570                                 indentGood = (spaceFlags & wsTab) == 0;
 571                         }
 572                         if (!indentGood) {
 573                                 styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
 574                                 startIndicator = sc.currentPos;
 575                         }
 576                 }
 577
 578                 if (sc.atLineEnd) {
 579                         ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
 580                         lineCurrent++;
 581                         if (!sc.More())
 582                                 break;
 583                 }
 584
 585                 bool needEOLCheck = false;
 586
 587
 588                 if (sc.state == SCE_P_OPERATOR) {
 589                         kwLast = kwOther;
 590                         sc.SetState(SCE_P_DEFAULT);
 591                 } else if (sc.state == SCE_P_NUMBER) {
 592                         if (!IsAWordChar(sc.ch, false) &&
 593                                         !(!base_n_number && ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E')))) {
 594                                 sc.SetState(SCE_P_DEFAULT);
 595                         }
 596                 } else if (sc.state == SCE_P_IDENTIFIER) {
 597                         if ((sc.ch == '.') || (!IsAWordChar(sc.ch, options.unicodeIdentifiers))) {
 598                                 char s[100];
 599                                 sc.GetCurrent(s, sizeof(s));
 600                                 int style = SCE_P_IDENTIFIER;
 601                                 if ((kwLast == kwImport) && (strcmp(s, "as") == 0)) {
 602                                         style = SCE_P_WORD;
 603                                 } else if (keywords.InList(s)) {
 604                                         style = SCE_P_WORD;
 605                                 } else if (kwLast == kwClass) {
 606                                         style = SCE_P_CLASSNAME;
 607                                 } else if (kwLast == kwDef) {
 608                                         style = SCE_P_DEFNAME;
 609                                 } else if (kwLast == kwCDef || kwLast == kwCPDef) {
 610                                         Sci_Position pos = sc.currentPos;
 611                                         unsigned char ch = styler.SafeGetCharAt(pos, '\0');
 612                                         while (ch != '\0') {
 613                                                 if (ch == '(') {
 614                                                         style = SCE_P_DEFNAME;
 615                                                         break;
 616                                                 } else if (ch == ':') {
 617                                                         style = SCE_P_CLASSNAME;
 618                                                         break;
 619                                                 } else if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') {
 620                                                         pos++;
 621                                                         ch = styler.SafeGetCharAt(pos, '\0');
 622                                                 } else {
 623                                                         break;
 624                                                 }
 625                                         }
 626                                 } else if (keywords2.InList(s)) {
 627                                         if (options.keywords2NoSubIdentifiers) {
 628                                                 // We don't want to highlight keywords2
 629                                                 // that are used as a sub-identifier,
 630                                                 // i.e. not open in "foo.open".
 631                                                 const Sci_Position pos = styler.GetStartSegment() - 1;
 632                                                 if (pos < 0 || (styler.SafeGetCharAt(pos, '\0') != '.'))
 633                                                         style = SCE_P_WORD2;
 634                                         } else {
 635                                                 style = SCE_P_WORD2;
 636                                         }
 637                                 } else {
 638                                         int subStyle = classifierIdentifiers.ValueFor(s);
 639                                         if (subStyle >= 0) {
 640                                                 style = subStyle;
 641                                         }
 642                                 }
 643                                 sc.ChangeState(style);
 644                                 sc.SetState(SCE_P_DEFAULT);
 645                                 if (style == SCE_P_WORD) {
 646                                         if (0 == strcmp(s, "class"))
 647                                                 kwLast = kwClass;
 648                                         else if (0 == strcmp(s, "def"))
 649                                                 kwLast = kwDef;
 650                                         else if (0 == strcmp(s, "import"))
 651                                                 kwLast = kwImport;
 652                                         else if (0 == strcmp(s, "cdef"))
 653                                                 kwLast = kwCDef;
 654                                         else if (0 == strcmp(s, "cpdef"))
 655                                                 kwLast = kwCPDef;
 656                                         else if (0 == strcmp(s, "cimport"))
 657                                                 kwLast = kwImport;
 658                                         else if (kwLast != kwCDef && kwLast != kwCPDef)
 659                                                 kwLast = kwOther;
 660                                 } else if (kwLast != kwCDef && kwLast != kwCPDef) {
 661                                         kwLast = kwOther;
 662                                 }
 663                         }
 664                 } else if ((sc.state == SCE_P_COMMENTLINE) || (sc.state == SCE_P_COMMENTBLOCK)) {
 665                         if (sc.ch == '\r' || sc.ch == '\n') {
 666                                 sc.SetState(SCE_P_DEFAULT);
 667                         }
 668                 } else if (sc.state == SCE_P_DECORATOR) {
 669                         if (!IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 670                                 sc.SetState(SCE_P_DEFAULT);
 671                         }
 672                 } else if (IsPySingleQuoteStringState(sc.state)) {
 673                         if (sc.ch == '\\') {
 674                                 if ((sc.chNext == '\r') && (sc.GetRelative(2) == '\n')) {
 675                                         sc.Forward();
 676                                 }
 677                                 if (sc.chNext == '\n' || sc.chNext == '\r') {
 678                                         inContinuedString = true;
 679                                 } else {
 680                                         // Don't roll over the newline.
 681                                         sc.Forward();
 682                                 }
 683                         } else if (sc.ch == GetPyStringQuoteChar(sc.state)) {
 684                                 sc.ForwardSetState(SCE_P_DEFAULT);
 685                                 needEOLCheck = true;
 686                         }
 687                 } else if ((sc.state == SCE_P_TRIPLE) || (sc.state == SCE_P_FTRIPLE)) {
 688                         if (sc.ch == '\\') {
 689                                 sc.Forward();
 690                         } else if (sc.Match(R"(''')")) {
 691                                 sc.Forward();
 692                                 sc.Forward();
 693                                 sc.ForwardSetState(SCE_P_DEFAULT);
 694                                 needEOLCheck = true;
 695                         }
 696                 } else if ((sc.state == SCE_P_TRIPLEDOUBLE) || (sc.state == SCE_P_FTRIPLEDOUBLE)) {
 697                         if (sc.ch == '\\') {
 698                                 sc.Forward();
 699                         } else if (sc.Match(R"(""")")) {
 700                                 sc.Forward();
 701                                 sc.Forward();
 702                                 sc.ForwardSetState(SCE_P_DEFAULT);
 703                                 needEOLCheck = true;
 704                         }
 705                 }
 706
 707                 // Note if used and not if else because string states also match
 708                 // some of the above clauses
 709                 if (IsPyFStringState(sc.state) && sc.ch == '{') {
 710                         if (sc.chNext == '{') {
 711                                 sc.Forward();
 712                         } else {
 713                                 PushStateToStack(sc.state, fstringStateStack, currentFStringExp);
 714                                 sc.ForwardSetState(SCE_P_DEFAULT);
 715                         }
 716                         needEOLCheck = true;
 717                 }
 718
 719                 // If in an f-string expression, check for the ending quote(s)
 720                 // and end f-string to handle syntactically incorrect cases like
 721                 // f'{' and f"""{"""
 722                 if (!fstringStateStack.empty() && (sc.ch == '\'' || sc.ch == '"')) {
 723                         long matching_stack_i = -1;
 724                         for (unsigned long stack_i = 0; stack_i < fstringStateStack.size() && matching_stack_i == -1; stack_i++) {
 725                                 const int stack_state = fstringStateStack[stack_i].state;
 726                                 const char quote = GetPyStringQuoteChar(stack_state);
 727                                 if (sc.ch == quote) {
 728                                         if (IsPySingleQuoteStringState(stack_state)) {
 729                                                 matching_stack_i = stack_i;
 730                                         } else if (quote == '"' ? sc.Match(R"(""")") : sc.Match("'''")) {
 731                                                 matching_stack_i = stack_i;
 732                                         }
 733                                 }
 734                         }
 735
 736                         if (matching_stack_i != -1) {
 737                                 sc.SetState(fstringStateStack[matching_stack_i].state);
 738                                 if (IsPyTripleQuoteStringState(fstringStateStack[matching_stack_i].state)) {
 739                                         sc.Forward();
 740                                         sc.Forward();
 741                                 }
 742                                 sc.ForwardSetState(SCE_P_DEFAULT);
 743                                 needEOLCheck = true;
 744
 745                                 while (fstringStateStack.size() > static_cast<unsigned long>(matching_stack_i)) {
 746                                         PopFromStateStack(fstringStateStack, currentFStringExp);
 747                                 }
 748                         }
 749                 }
 750                 // End of code to find the end of a state
 751
 752                 if (!indentGood && !IsASpaceOrTab(sc.ch)) {
 753                         styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 1);
 754                         startIndicator = sc.currentPos;
 755                         indentGood = true;
 756                 }
 757
 758                 // One cdef or cpdef line, clear kwLast only at end of line
 759                 if ((kwLast == kwCDef || kwLast == kwCPDef) && sc.atLineEnd) {
 760                         kwLast = kwOther;
 761                 }
 762
 763                 // State exit code may have moved on to end of line
 764                 if (needEOLCheck && sc.atLineEnd) {
 765                         ProcessLineEnd(sc, fstringStateStack, currentFStringExp, inContinuedString);
 766                         lineCurrent++;
 767                         styler.IndentAmount(lineCurrent, &spaceFlags, IsPyComment);
 768                         if (!sc.More())
 769                                 break;
 770                 }
 771
 772                 // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
 773                 if (currentFStringExp && !IsPySingleQuoteStringState(sc.state) && !IsPyTripleQuoteStringState(sc.state)) {
 774                         if (currentFStringExp->nestingCount == 0 && (sc.ch == '}' || sc.ch == ':' || (sc.ch == '!' && sc.chNext != '='))) {
 775                                 sc.SetState(PopFromStateStack(fstringStateStack, currentFStringExp));
 776                         } else {
 777                                 if (sc.ch == '{' || sc.ch == '[' || sc.ch == '(') {
 778                                         currentFStringExp->nestingCount++;
 779                                 } else if (sc.ch == '}' || sc.ch == ']' || sc.ch == ')') {
 780                                         currentFStringExp->nestingCount--;
 781                                 }
 782                         }
 783                 }
 784
 785                 // Check for a new state starting character
 786                 if (sc.state == SCE_P_DEFAULT) {
 787                         if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
 788                                 if (sc.ch == '0' && (sc.chNext == 'x' || sc.chNext == 'X')) {
 789                                         base_n_number = true;
 790                                         sc.SetState(SCE_P_NUMBER);
 791                                 } else if (sc.ch == '0' &&
 792                                                 (sc.chNext == 'o' || sc.chNext == 'O' || sc.chNext == 'b' || sc.chNext == 'B')) {
 793                                         if (options.base2or8Literals) {
 794                                                 base_n_number = true;
 795                                                 sc.SetState(SCE_P_NUMBER);
 796                                         } else {
 797                                                 sc.SetState(SCE_P_NUMBER);
 798                                                 sc.ForwardSetState(SCE_P_IDENTIFIER);
 799                                         }
 800                                 } else {
 801                                         base_n_number = false;
 802                                         sc.SetState(SCE_P_NUMBER);
 803                                 }
 804                         } else if (isoperator(sc.ch) || sc.ch == '`') {
 805                                 sc.SetState(SCE_P_OPERATOR);
 806                         } else if (sc.ch == '#') {
 807                                 sc.SetState(sc.chNext == '#' ? SCE_P_COMMENTBLOCK : SCE_P_COMMENTLINE);
 808                         } else if (sc.ch == '@') {
 809                                 if (IsFirstNonWhitespace(sc.currentPos, styler))
 810                                         sc.SetState(SCE_P_DECORATOR);
 811                                 else
 812                                         sc.SetState(SCE_P_OPERATOR);
 813                         } else if (IsPyStringStart(sc.ch, sc.chNext, sc.GetRelative(2), allowedLiterals)) {
 814                                 Sci_PositionU nextIndex = 0;
 815                                 sc.SetState(GetPyStringState(styler, sc.currentPos, &nextIndex, allowedLiterals));
 816                                 while (nextIndex > (sc.currentPos + 1) && sc.More()) {
 817                                         sc.Forward();
 818                                 }
 819                         } else if (IsAWordStart(sc.ch, options.unicodeIdentifiers)) {
 820                                 sc.SetState(SCE_P_IDENTIFIER);
 821                         }
 822                 }
 823         }
 824         styler.IndicatorFill(startIndicator, sc.currentPos, indicatorWhitespace, 0);
 825         sc.Complete();
 826 }
 827
 828 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
 829         const Sci_Position pos = styler.LineStart(line);
 830         const Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
 831         for (Sci_Position i = pos; i < eol_pos; i++) {
 832                 const char ch = styler[i];
 833                 if (ch == '#')
 834                         return true;
 835                 else if (ch != ' ' && ch != '\t')
 836                         return false;
 837         }
 838         return false;
 839 }
 840
 841 static bool IsQuoteLine(Sci_Position line, const Accessor &styler) {
 842         const int style = styler.StyleAt(styler.LineStart(line)) & 31;
 843         return IsPyTripleQuoteStringState(style);
 844 }
 845
 846
 847 void SCI_METHOD LexerPython::Fold(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/, IDocument *pAccess) {
 848         if (!options.fold)
 849                 return;
 850
 851         Accessor styler(pAccess, nullptr);
 852
 853         const Sci_Position maxPos = startPos + length;
 854         const Sci_Position maxLines = (maxPos == styler.Length()) ? styler.GetLine(maxPos) : styler.GetLine(maxPos - 1);        // Requested last line
 855         const Sci_Position docLines = styler.GetLine(styler.Length());  // Available last line
 856
 857         // Backtrack to previous non-blank line so we can determine indent level
 858         // for any white space lines (needed esp. within triple quoted strings)
 859         // and so we can fix any preceding fold level (which is why we go back
 860         // at least one line in all cases)
 861         int spaceFlags = 0;
 862         Sci_Position lineCurrent = styler.GetLine(startPos);
 863         int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, nullptr);
 864         while (lineCurrent > 0) {
 865                 lineCurrent--;
 866                 indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, nullptr);
 867                 if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
 868                                 (!IsCommentLine(lineCurrent, styler)) &&
 869                                 (!IsQuoteLine(lineCurrent, styler)))
 870                         break;
 871         }
 872         int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 873
 874         // Set up initial loop state
 875         startPos = styler.LineStart(lineCurrent);
 876         int prev_state = SCE_P_DEFAULT & 31;
 877         if (lineCurrent >= 1)
 878                 prev_state = styler.StyleAt(startPos - 1) & 31;
 879         int prevQuote = options.foldQuotes && IsPyTripleQuoteStringState(prev_state);
 880
 881         // Process all characters to end of requested range or end of any triple quote
 882         //that hangs over the end of the range.  Cap processing in all cases
 883         // to end of document (in case of unclosed quote at end).
 884         while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevQuote)) {
 885
 886                 // Gather info
 887                 int lev = indentCurrent;
 888                 Sci_Position lineNext = lineCurrent + 1;
 889                 int indentNext = indentCurrent;
 890                 int quote = false;
 891                 if (lineNext <= docLines) {
 892                         // Information about next line is only available if not at end of document
 893                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, nullptr);
 894                         const Sci_Position lookAtPos = (styler.LineStart(lineNext) == styler.Length()) ? styler.Length() - 1 : styler.LineStart(lineNext);
 895                         const int style = styler.StyleAt(lookAtPos) & 31;
 896                         quote = options.foldQuotes && IsPyTripleQuoteStringState(style);
 897                 }
 898                 const int quote_start = (quote && !prevQuote);
 899                 const int quote_continue = (quote && prevQuote);
 900                 if (!quote || !prevQuote)
 901                         indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
 902                 if (quote)
 903                         indentNext = indentCurrentLevel;
 904                 if (indentNext & SC_FOLDLEVELWHITEFLAG)
 905                         indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
 906
 907                 if (quote_start) {
 908                         // Place fold point at start of triple quoted string
 909                         lev |= SC_FOLDLEVELHEADERFLAG;
 910                 } else if (quote_continue || prevQuote) {
 911                         // Add level to rest of lines in the string
 912                         lev = lev + 1;
 913                 }
 914
 915                 // Skip past any blank lines for next indent level info; we skip also
 916                 // comments (all comments, not just those starting in column 0)
 917                 // which effectively folds them into surrounding code rather
 918                 // than screwing up folding.  If comments end file, use the min
 919                 // comment indent as the level after
 920
 921                 int minCommentLevel = indentCurrentLevel;
 922                 while (!quote &&
 923                                 (lineNext < docLines) &&
 924                                 ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
 925                                  (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
 926
 927                         if (IsCommentLine(lineNext, styler) && indentNext < minCommentLevel) {
 928                                 minCommentLevel = indentNext;
 929                         }
 930
 931                         lineNext++;
 932                         indentNext = styler.IndentAmount(lineNext, &spaceFlags, nullptr);
 933                 }
 934
 935                 const int levelAfterComments = ((lineNext < docLines) ? indentNext & SC_FOLDLEVELNUMBERMASK : minCommentLevel);
 936                 const int levelBeforeComments = std::max(indentCurrentLevel, levelAfterComments);
 937
 938                 // Now set all the indent levels on the lines we skipped
 939                 // Do this from end to start.  Once we encounter one line
 940                 // which is indented more than the line after the end of
 941                 // the comment-block, use the level of the block before
 942
 943                 Sci_Position skipLine = lineNext;
 944                 int skipLevel = levelAfterComments;
 945
 946                 while (--skipLine > lineCurrent) {
 947                         const int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, nullptr);
 948
 949                         if (options.foldCompact) {
 950                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
 951                                         skipLevel = levelBeforeComments;
 952
 953                                 const int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
 954
 955                                 styler.SetLevel(skipLine, skipLevel | whiteFlag);
 956                         } else {
 957                                 if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments &&
 958                                                 !(skipLineIndent & SC_FOLDLEVELWHITEFLAG) &&
 959                                                 !IsCommentLine(skipLine, styler))
 960                                         skipLevel = levelBeforeComments;
 961
 962                                 styler.SetLevel(skipLine, skipLevel);
 963                         }
 964                 }
 965
 966                 // Set fold header on non-quote line
 967                 if (!quote && !(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
 968                         if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
 969                                 lev |= SC_FOLDLEVELHEADERFLAG;
 970                 }
 971
 972                 // Keep track of triple quote state of previous line
 973                 prevQuote = quote;
 974
 975                 // Set fold level for this line and move to next line
 976                 styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
 977                 indentCurrent = indentNext;
 978                 lineCurrent = lineNext;
 979         }
 980
 981         // NOTE: Cannot set level of last line here because indentCurrent doesn't have
 982         // header flag set; the loop above is crafted to take care of this case!
 983         //styler.SetLevel(lineCurrent, indentCurrent);
 984 }
 985
 986 LexerModule lmPython(SCLEX_PYTHON, LexerPython::LexerFactoryPython, "python",
 987                      pythonWordListDesc);