1 // Scintilla source code edit control
2 /** @file LexPython.cxx
5 // Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
13 #include <string_view>
20 #include "Scintilla.h"
23 #include "StringCopy.h"
25 #include "LexAccessor.h"
27 #include "StyleContext.h"
28 #include "CharacterSet.h"
29 #include "CharacterCategory.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "SubStyles.h"
33 #include "DefaultLexer.h"
35 using namespace Scintilla
;
36 using namespace Lexilla
;
39 // Use an unnamed namespace to protect the functions and classes from name conflicts
41 /* Notes on f-strings: f-strings are strings prefixed with f (e.g. f'') that may
42 have arbitrary expressions in {}. The tokens in the expressions are lexed as if
43 they were outside of any string. Expressions may contain { and } characters as
44 long as there is a closing } for every {, may be 2+ lines in a triple quoted
45 string, and may have a formatting specifier following a ! or :, but both !
46 and : are valid inside of a bracketed expression and != is a valid
47 expression token even outside of a bracketed expression.
49 When in an f-string expression, the lexer keeps track of the state value of
50 the f-string and the nesting count for the expression (# of [, (, { seen - # of
51 }, ), ] seen). f-strings may be nested (e.g. f'{ a + f"{1+2}"') so a stack of
52 states and nesting counts is kept. If a f-string expression continues beyond
53 the end of a line, this stack is saved in a std::map that maps a line number to
54 the stack at the end of that line. std::vector is used for the stack.
56 The PEP for f-strings is at https://www.python.org/dev/peps/pep-0498/
58 struct SingleFStringExpState
{
63 /* kwCDef, kwCTypeName only used for Cython */
64 enum kwType
{ kwOther
, kwClass
, kwDef
, kwImport
, kwCDef
, kwCTypeName
, kwCPDef
};
66 enum literalsAllowed
{ litNone
= 0, litU
= 1, litB
= 2, litF
= 4 };
68 constexpr int indicatorWhitespace
= 1;
70 bool IsPyComment(Accessor
&styler
, Sci_Position pos
, Sci_Position len
) {
71 return len
> 0 && styler
[pos
] == '#';
74 bool IsPyStringTypeChar(int ch
, literalsAllowed allowed
) noexcept
{
76 ((allowed
& litB
) && (ch
== 'b' || ch
== 'B')) ||
77 ((allowed
& litU
) && (ch
== 'u' || ch
== 'U')) ||
78 ((allowed
& litF
) && (ch
== 'f' || ch
== 'F'));
81 bool IsPyStringStart(int ch
, int chNext
, int chNext2
, literalsAllowed allowed
) noexcept
{
82 if (ch
== '\'' || ch
== '"')
84 if (IsPyStringTypeChar(ch
, allowed
)) {
85 if (chNext
== '"' || chNext
== '\'')
87 if ((chNext
== 'r' || chNext
== 'R') && (chNext2
== '"' || chNext2
== '\''))
90 if ((ch
== 'r' || ch
== 'R') && (chNext
== '"' || chNext
== '\''))
96 bool IsPyFStringState(int st
) noexcept
{
97 return ((st
== SCE_P_FCHARACTER
) || (st
== SCE_P_FSTRING
) ||
98 (st
== SCE_P_FTRIPLE
) || (st
== SCE_P_FTRIPLEDOUBLE
));
101 bool IsPySingleQuoteStringState(int st
) noexcept
{
102 return ((st
== SCE_P_CHARACTER
) || (st
== SCE_P_STRING
) ||
103 (st
== SCE_P_FCHARACTER
) || (st
== SCE_P_FSTRING
));
106 bool IsPyTripleQuoteStringState(int st
) noexcept
{
107 return ((st
== SCE_P_TRIPLE
) || (st
== SCE_P_TRIPLEDOUBLE
) ||
108 (st
== SCE_P_FTRIPLE
) || (st
== SCE_P_FTRIPLEDOUBLE
));
111 char GetPyStringQuoteChar(int st
) noexcept
{
112 if ((st
== SCE_P_CHARACTER
) || (st
== SCE_P_FCHARACTER
) ||
113 (st
== SCE_P_TRIPLE
) || (st
== SCE_P_FTRIPLE
))
115 if ((st
== SCE_P_STRING
) || (st
== SCE_P_FSTRING
) ||
116 (st
== SCE_P_TRIPLEDOUBLE
) || (st
== SCE_P_FTRIPLEDOUBLE
))
122 void PushStateToStack(int state
, std::vector
<SingleFStringExpState
> &stack
, SingleFStringExpState
*¤tFStringExp
) {
123 SingleFStringExpState single
= {state
, 0};
124 stack
.push_back(single
);
126 currentFStringExp
= &stack
.back();
129 int PopFromStateStack(std::vector
<SingleFStringExpState
> &stack
, SingleFStringExpState
*¤tFStringExp
) noexcept
{
132 if (!stack
.empty()) {
133 state
= stack
.back().state
;
138 currentFStringExp
= nullptr;
140 currentFStringExp
= &stack
.back();
146 /* Return the state to use for the string starting at i; *nextIndex will be set to the first index following the quote(s) */
147 int GetPyStringState(Accessor
&styler
, Sci_Position i
, Sci_PositionU
*nextIndex
, literalsAllowed allowed
) {
148 char ch
= styler
.SafeGetCharAt(i
);
149 char chNext
= styler
.SafeGetCharAt(i
+ 1);
150 const int firstIsF
= (ch
== 'f' || ch
== 'F');
152 // Advance beyond r, u, or ur prefix (or r, b, or br in Python 2.7+ and r, f, or fr in Python 3.6+), but bail if there are any unexpected chars
153 if (ch
== 'r' || ch
== 'R') {
155 ch
= styler
.SafeGetCharAt(i
);
156 chNext
= styler
.SafeGetCharAt(i
+ 1);
157 } else if (IsPyStringTypeChar(ch
, allowed
)) {
158 if (chNext
== 'r' || chNext
== 'R')
162 ch
= styler
.SafeGetCharAt(i
);
163 chNext
= styler
.SafeGetCharAt(i
+ 1);
166 if (ch
!= '"' && ch
!= '\'') {
168 return SCE_P_DEFAULT
;
171 if (ch
== chNext
&& ch
== styler
.SafeGetCharAt(i
+ 2)) {
175 return (firstIsF
? SCE_P_FTRIPLEDOUBLE
: SCE_P_TRIPLEDOUBLE
);
177 return (firstIsF
? SCE_P_FTRIPLE
: SCE_P_TRIPLE
);
182 return (firstIsF
? SCE_P_FSTRING
: SCE_P_STRING
);
184 return (firstIsF
? SCE_P_FCHARACTER
: SCE_P_CHARACTER
);
188 inline bool IsAWordChar(int ch
, bool unicodeIdentifiers
) {
190 return (IsAlphaNumeric(ch
) || ch
== '.' || ch
== '_');
192 if (!unicodeIdentifiers
)
195 // Python uses the XID_Continue set from Unicode data
196 return IsXidContinue(ch
);
199 inline bool IsAWordStart(int ch
, bool unicodeIdentifiers
) {
201 return (IsUpperOrLowerCase(ch
) || ch
== '_');
203 if (!unicodeIdentifiers
)
206 // Python uses the XID_Start set from Unicode data
207 return IsXidStart(ch
);
210 bool IsFirstNonWhitespace(Sci_Position pos
, Accessor
&styler
) {
211 const Sci_Position line
= styler
.GetLine(pos
);
212 const Sci_Position start_pos
= styler
.LineStart(line
);
213 for (Sci_Position i
= start_pos
; i
< pos
; i
++) {
214 const char ch
= styler
[i
];
215 if (!(ch
== ' ' || ch
== '\t'))
221 // Options used for LexerPython
222 struct OptionsPython
{
224 bool base2or8Literals
;
228 bool stringsOverNewline
;
229 bool keywords2NoSubIdentifiers
;
233 bool unicodeIdentifiers
;
237 base2or8Literals
= true;
241 stringsOverNewline
= false;
242 keywords2NoSubIdentifiers
= false;
246 unicodeIdentifiers
= true;
249 literalsAllowed
AllowedLiterals() const noexcept
{
250 literalsAllowed allowedLiterals
= stringsU
? litU
: litNone
;
252 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litB
);
254 allowedLiterals
= static_cast<literalsAllowed
>(allowedLiterals
| litF
);
255 return allowedLiterals
;
259 const char *const pythonWordListDesc
[] = {
261 "Highlighted identifiers",
265 struct OptionSetPython
: public OptionSet
<OptionsPython
> {
267 DefineProperty("tab.timmy.whinge.level", &OptionsPython::whingeLevel
,
268 "For Python code, checks whether indenting is consistent. "
269 "The default, 0 turns off indentation checking, "
270 "1 checks whether each line is potentially inconsistent with the previous line, "
271 "2 checks whether any space characters occur before a tab character in the indentation, "
272 "3 checks whether any spaces are in the indentation, and "
273 "4 checks for any tab characters in the indentation. "
274 "1 is a good level to use.");
276 DefineProperty("lexer.python.literals.binary", &OptionsPython::base2or8Literals
,
277 "Set to 0 to not recognise Python 3 binary and octal literals: 0b1011 0o712.");
279 DefineProperty("lexer.python.strings.u", &OptionsPython::stringsU
,
280 "Set to 0 to not recognise Python Unicode literals u\"x\" as used before Python 3.");
282 DefineProperty("lexer.python.strings.b", &OptionsPython::stringsB
,
283 "Set to 0 to not recognise Python 3 bytes literals b\"x\".");
285 DefineProperty("lexer.python.strings.f", &OptionsPython::stringsF
,
286 "Set to 0 to not recognise Python 3.6 f-string literals f\"var={var}\".");
288 DefineProperty("lexer.python.strings.over.newline", &OptionsPython::stringsOverNewline
,
289 "Set to 1 to allow strings to span newline characters.");
291 DefineProperty("lexer.python.keywords2.no.sub.identifiers", &OptionsPython::keywords2NoSubIdentifiers
,
292 "When enabled, it will not style keywords2 items that are used as a sub-identifier. "
293 "Example: when set, will not highlight \"foo.open\" when \"open\" is a keywords2 item.");
295 DefineProperty("fold", &OptionsPython::fold
);
297 DefineProperty("fold.quotes.python", &OptionsPython::foldQuotes
,
298 "This option enables folding multi-line quoted strings when using the Python lexer.");
300 DefineProperty("fold.compact", &OptionsPython::foldCompact
);
302 DefineProperty("lexer.python.unicode.identifiers", &OptionsPython::unicodeIdentifiers
,
303 "Set to 0 to not recognise Python 3 Unicode identifiers.");
305 DefineWordListSets(pythonWordListDesc
);
309 const char styleSubable
[] = { SCE_P_IDENTIFIER
, 0 };
311 LexicalClass lexicalClasses
[] = {
312 // Lexer Python SCLEX_PYTHON SCE_P_:
313 0, "SCE_P_DEFAULT", "default", "White space",
314 1, "SCE_P_COMMENTLINE", "comment line", "Comment",
315 2, "SCE_P_NUMBER", "literal numeric", "Number",
316 3, "SCE_P_STRING", "literal string", "String",
317 4, "SCE_P_CHARACTER", "literal string", "Single quoted string",
318 5, "SCE_P_WORD", "keyword", "Keyword",
319 6, "SCE_P_TRIPLE", "literal string", "Triple quotes",
320 7, "SCE_P_TRIPLEDOUBLE", "literal string", "Triple double quotes",
321 8, "SCE_P_CLASSNAME", "identifier", "Class name definition",
322 9, "SCE_P_DEFNAME", "identifier", "Function or method name definition",
323 10, "SCE_P_OPERATOR", "operator", "Operators",
324 11, "SCE_P_IDENTIFIER", "identifier", "Identifiers",
325 12, "SCE_P_COMMENTBLOCK", "comment", "Comment-blocks",
326 13, "SCE_P_STRINGEOL", "error literal string", "End of line where string is not closed",
327 14, "SCE_P_WORD2", "identifier", "Highlighted identifiers",
328 15, "SCE_P_DECORATOR", "preprocessor", "Decorators",
329 16, "SCE_P_FSTRING", "literal string interpolated", "F-String",
330 17, "SCE_P_FCHARACTER", "literal string interpolated", "Single quoted f-string",
331 18, "SCE_P_FTRIPLE", "literal string interpolated", "Triple quoted f-string",
332 19, "SCE_P_FTRIPLEDOUBLE", "literal string interpolated", "Triple double quoted f-string",
337 class LexerPython
: public DefaultLexer
{
340 OptionsPython options
;
341 OptionSetPython osPython
;
342 enum { ssIdentifier
};
344 std::map
<Sci_Position
, std::vector
<SingleFStringExpState
> > ftripleStateAtEol
;
346 explicit LexerPython() :
347 DefaultLexer("python", SCLEX_PYTHON
, lexicalClasses
, ELEMENTS(lexicalClasses
)),
348 subStyles(styleSubable
, 0x80, 0x40, 0) {
350 ~LexerPython() override
{
352 void SCI_METHOD
Release() override
{
355 int SCI_METHOD
Version() const override
{
358 const char *SCI_METHOD
PropertyNames() override
{
359 return osPython
.PropertyNames();
361 int SCI_METHOD
PropertyType(const char *name
) override
{
362 return osPython
.PropertyType(name
);
364 const char *SCI_METHOD
DescribeProperty(const char *name
) override
{
365 return osPython
.DescribeProperty(name
);
367 Sci_Position SCI_METHOD
PropertySet(const char *key
, const char *val
) override
;
368 const char * SCI_METHOD
PropertyGet(const char *key
) override
{
369 return osPython
.PropertyGet(key
);
371 const char *SCI_METHOD
DescribeWordListSets() override
{
372 return osPython
.DescribeWordListSets();
374 Sci_Position SCI_METHOD
WordListSet(int n
, const char *wl
) override
;
375 void SCI_METHOD
Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
376 void SCI_METHOD
Fold(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) override
;
378 void *SCI_METHOD
PrivateCall(int, void *) override
{
382 int SCI_METHOD
LineEndTypesSupported() override
{
383 return SC_LINE_END_TYPE_UNICODE
;
386 int SCI_METHOD
AllocateSubStyles(int styleBase
, int numberStyles
) override
{
387 return subStyles
.Allocate(styleBase
, numberStyles
);
389 int SCI_METHOD
SubStylesStart(int styleBase
) override
{
390 return subStyles
.Start(styleBase
);
392 int SCI_METHOD
SubStylesLength(int styleBase
) override
{
393 return subStyles
.Length(styleBase
);
395 int SCI_METHOD
StyleFromSubStyle(int subStyle
) override
{
396 const int styleBase
= subStyles
.BaseStyle(subStyle
);
399 int SCI_METHOD
PrimaryStyleFromStyle(int style
) override
{
402 void SCI_METHOD
FreeSubStyles() override
{
405 void SCI_METHOD
SetIdentifiers(int style
, const char *identifiers
) override
{
406 subStyles
.SetIdentifiers(style
, identifiers
);
408 int SCI_METHOD
DistanceToSecondaryStyles() override
{
411 const char *SCI_METHOD
GetSubStyleBases() override
{
415 static ILexer5
*LexerFactoryPython() {
416 return new LexerPython();
420 void ProcessLineEnd(StyleContext
&sc
, std::vector
<SingleFStringExpState
> &fstringStateStack
, SingleFStringExpState
*¤tFStringExp
, bool &inContinuedString
);
423 Sci_Position SCI_METHOD
LexerPython::PropertySet(const char *key
, const char *val
) {
424 if (osPython
.PropertySet(&options
, key
, val
)) {
430 Sci_Position SCI_METHOD
LexerPython::WordListSet(int n
, const char *wl
) {
431 WordList
*wordListN
= nullptr;
434 wordListN
= &keywords
;
437 wordListN
= &keywords2
;
440 Sci_Position firstModification
= -1;
444 if (*wordListN
!= wlNew
) {
446 firstModification
= 0;
449 return firstModification
;
452 void LexerPython::ProcessLineEnd(StyleContext
&sc
, std::vector
<SingleFStringExpState
> &fstringStateStack
, SingleFStringExpState
*¤tFStringExp
, bool &inContinuedString
) {
453 long deepestSingleStateIndex
= -1;
456 // Find the deepest single quote state because that string will end; no \ continuation in f-string
457 for (i
= 0; i
< fstringStateStack
.size(); i
++) {
458 if (IsPySingleQuoteStringState(fstringStateStack
[i
].state
)) {
459 deepestSingleStateIndex
= i
;
464 if (deepestSingleStateIndex
!= -1) {
465 sc
.SetState(fstringStateStack
[deepestSingleStateIndex
].state
);
466 while (fstringStateStack
.size() > static_cast<unsigned long>(deepestSingleStateIndex
)) {
467 PopFromStateStack(fstringStateStack
, currentFStringExp
);
470 if (!fstringStateStack
.empty()) {
471 std::pair
<Sci_Position
, std::vector
<SingleFStringExpState
> > val
;
472 val
.first
= sc
.currentLine
;
473 val
.second
= fstringStateStack
;
475 ftripleStateAtEol
.insert(val
);
478 if ((sc
.state
== SCE_P_DEFAULT
)
479 || IsPyTripleQuoteStringState(sc
.state
)) {
480 // Perform colourisation of white space and triple quoted strings at end of each line to allow
481 // tab marking to work inside white space and triple quoted strings
482 sc
.SetState(sc
.state
);
484 if (IsPySingleQuoteStringState(sc
.state
)) {
485 if (inContinuedString
|| options
.stringsOverNewline
) {
486 inContinuedString
= false;
488 sc
.ChangeState(SCE_P_STRINGEOL
);
489 sc
.ForwardSetState(SCE_P_DEFAULT
);
494 void SCI_METHOD
LexerPython::Lex(Sci_PositionU startPos
, Sci_Position length
, int initStyle
, IDocument
*pAccess
) {
495 Accessor
styler(pAccess
, nullptr);
497 // Track whether in f-string expression; vector is used for a stack to
498 // handle nested f-strings such as f"""{f'''{f"{f'{1}'}"}'''}"""
499 std::vector
<SingleFStringExpState
> fstringStateStack
;
500 SingleFStringExpState
*currentFStringExp
= nullptr;
502 const Sci_Position endPos
= startPos
+ length
;
504 // Backtrack to previous line in case need to fix its tab whinging
505 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
507 if (lineCurrent
> 0) {
509 // Look for backslash-continued lines
510 while (lineCurrent
> 0) {
511 const Sci_Position eolPos
= styler
.LineStart(lineCurrent
) - 1;
512 const int eolStyle
= styler
.StyleAt(eolPos
);
513 if (eolStyle
== SCE_P_STRING
514 || eolStyle
== SCE_P_CHARACTER
515 || eolStyle
== SCE_P_STRINGEOL
) {
521 startPos
= styler
.LineStart(lineCurrent
);
523 initStyle
= startPos
== 0 ? SCE_P_DEFAULT
: styler
.StyleAt(startPos
- 1);
526 const literalsAllowed allowedLiterals
= options
.AllowedLiterals();
528 initStyle
= initStyle
& 31;
529 if (initStyle
== SCE_P_STRINGEOL
) {
530 initStyle
= SCE_P_DEFAULT
;
533 // Set up fstate stack from last line and remove any subsequent ftriple at eol states
534 std::map
<Sci_Position
, std::vector
<SingleFStringExpState
> >::iterator it
;
535 it
= ftripleStateAtEol
.find(lineCurrent
- 1);
536 if (it
!= ftripleStateAtEol
.end() && !it
->second
.empty()) {
537 fstringStateStack
= it
->second
;
538 currentFStringExp
= &fstringStateStack
.back();
540 it
= ftripleStateAtEol
.lower_bound(lineCurrent
);
541 if (it
!= ftripleStateAtEol
.end()) {
542 ftripleStateAtEol
.erase(it
, ftripleStateAtEol
.end());
545 kwType kwLast
= kwOther
;
547 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
548 bool base_n_number
= false;
550 const WordClassifier
&classifierIdentifiers
= subStyles
.Classifier(SCE_P_IDENTIFIER
);
552 StyleContext
sc(startPos
, endPos
- startPos
, initStyle
, styler
);
554 bool indentGood
= true;
555 Sci_Position startIndicator
= sc
.currentPos
;
556 bool inContinuedString
= false;
558 for (; sc
.More(); sc
.Forward()) {
560 if (sc
.atLineStart
) {
561 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
563 if (options
.whingeLevel
== 1) {
564 indentGood
= (spaceFlags
& wsInconsistent
) == 0;
565 } else if (options
.whingeLevel
== 2) {
566 indentGood
= (spaceFlags
& wsSpaceTab
) == 0;
567 } else if (options
.whingeLevel
== 3) {
568 indentGood
= (spaceFlags
& wsSpace
) == 0;
569 } else if (options
.whingeLevel
== 4) {
570 indentGood
= (spaceFlags
& wsTab
) == 0;
573 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
574 startIndicator
= sc
.currentPos
;
579 ProcessLineEnd(sc
, fstringStateStack
, currentFStringExp
, inContinuedString
);
585 bool needEOLCheck
= false;
588 if (sc
.state
== SCE_P_OPERATOR
) {
590 sc
.SetState(SCE_P_DEFAULT
);
591 } else if (sc
.state
== SCE_P_NUMBER
) {
592 if (!IsAWordChar(sc
.ch
, false) &&
593 !(!base_n_number
&& ((sc
.ch
== '+' || sc
.ch
== '-') && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E')))) {
594 sc
.SetState(SCE_P_DEFAULT
);
596 } else if (sc
.state
== SCE_P_IDENTIFIER
) {
597 if ((sc
.ch
== '.') || (!IsAWordChar(sc
.ch
, options
.unicodeIdentifiers
))) {
599 sc
.GetCurrent(s
, sizeof(s
));
600 int style
= SCE_P_IDENTIFIER
;
601 if ((kwLast
== kwImport
) && (strcmp(s
, "as") == 0)) {
603 } else if (keywords
.InList(s
)) {
605 } else if (kwLast
== kwClass
) {
606 style
= SCE_P_CLASSNAME
;
607 } else if (kwLast
== kwDef
) {
608 style
= SCE_P_DEFNAME
;
609 } else if (kwLast
== kwCDef
|| kwLast
== kwCPDef
) {
610 Sci_Position pos
= sc
.currentPos
;
611 unsigned char ch
= styler
.SafeGetCharAt(pos
, '\0');
614 style
= SCE_P_DEFNAME
;
616 } else if (ch
== ':') {
617 style
= SCE_P_CLASSNAME
;
619 } else if (ch
== ' ' || ch
== '\t' || ch
== '\n' || ch
== '\r') {
621 ch
= styler
.SafeGetCharAt(pos
, '\0');
626 } else if (keywords2
.InList(s
)) {
627 if (options
.keywords2NoSubIdentifiers
) {
628 // We don't want to highlight keywords2
629 // that are used as a sub-identifier,
630 // i.e. not open in "foo.open".
631 const Sci_Position pos
= styler
.GetStartSegment() - 1;
632 if (pos
< 0 || (styler
.SafeGetCharAt(pos
, '\0') != '.'))
638 int subStyle
= classifierIdentifiers
.ValueFor(s
);
643 sc
.ChangeState(style
);
644 sc
.SetState(SCE_P_DEFAULT
);
645 if (style
== SCE_P_WORD
) {
646 if (0 == strcmp(s
, "class"))
648 else if (0 == strcmp(s
, "def"))
650 else if (0 == strcmp(s
, "import"))
652 else if (0 == strcmp(s
, "cdef"))
654 else if (0 == strcmp(s
, "cpdef"))
656 else if (0 == strcmp(s
, "cimport"))
658 else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
)
660 } else if (kwLast
!= kwCDef
&& kwLast
!= kwCPDef
) {
664 } else if ((sc
.state
== SCE_P_COMMENTLINE
) || (sc
.state
== SCE_P_COMMENTBLOCK
)) {
665 if (sc
.ch
== '\r' || sc
.ch
== '\n') {
666 sc
.SetState(SCE_P_DEFAULT
);
668 } else if (sc
.state
== SCE_P_DECORATOR
) {
669 if (!IsAWordStart(sc
.ch
, options
.unicodeIdentifiers
)) {
670 sc
.SetState(SCE_P_DEFAULT
);
672 } else if (IsPySingleQuoteStringState(sc
.state
)) {
674 if ((sc
.chNext
== '\r') && (sc
.GetRelative(2) == '\n')) {
677 if (sc
.chNext
== '\n' || sc
.chNext
== '\r') {
678 inContinuedString
= true;
680 // Don't roll over the newline.
683 } else if (sc
.ch
== GetPyStringQuoteChar(sc
.state
)) {
684 sc
.ForwardSetState(SCE_P_DEFAULT
);
687 } else if ((sc
.state
== SCE_P_TRIPLE
) || (sc
.state
== SCE_P_FTRIPLE
)) {
690 } else if (sc
.Match(R
"(''')")) {
693 sc
.ForwardSetState(SCE_P_DEFAULT
);
696 } else if ((sc
.state
== SCE_P_TRIPLEDOUBLE
) || (sc
.state
== SCE_P_FTRIPLEDOUBLE
)) {
699 } else if (sc
.Match(R
"(""")")) {
702 sc.ForwardSetState(SCE_P_DEFAULT);
707 // Note if used and not if else because string states also match
708 // some of the above clauses
709 if (IsPyFStringState(sc.state) && sc.ch == '{') {
710 if (sc.chNext == '{') {
713 PushStateToStack(sc.state, fstringStateStack, currentFStringExp);
714 sc.ForwardSetState(SCE_P_DEFAULT);
719 // If in an f-string expression, check for the ending quote(s)
720 // and end f-string to handle syntactically incorrect cases like
722 if (!fstringStateStack.empty() && (sc.ch == '\'' || sc.ch == '"')) {
723 long matching_stack_i = -1;
724 for (unsigned long stack_i = 0; stack_i < fstringStateStack.size() && matching_stack_i == -1; stack_i++) {
725 const int stack_state = fstringStateStack[stack_i].state;
726 const char quote = GetPyStringQuoteChar(stack_state);
727 if (sc.ch == quote) {
728 if (IsPySingleQuoteStringState(stack_state)) {
729 matching_stack_i = stack_i;
730 } else if (quote == '"' ? sc.Match(R"(""")") : sc
.Match("'''")) {
731 matching_stack_i
= stack_i
;
736 if (matching_stack_i
!= -1) {
737 sc
.SetState(fstringStateStack
[matching_stack_i
].state
);
738 if (IsPyTripleQuoteStringState(fstringStateStack
[matching_stack_i
].state
)) {
742 sc
.ForwardSetState(SCE_P_DEFAULT
);
745 while (fstringStateStack
.size() > static_cast<unsigned long>(matching_stack_i
)) {
746 PopFromStateStack(fstringStateStack
, currentFStringExp
);
750 // End of code to find the end of a state
752 if (!indentGood
&& !IsASpaceOrTab(sc
.ch
)) {
753 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 1);
754 startIndicator
= sc
.currentPos
;
758 // One cdef or cpdef line, clear kwLast only at end of line
759 if ((kwLast
== kwCDef
|| kwLast
== kwCPDef
) && sc
.atLineEnd
) {
763 // State exit code may have moved on to end of line
764 if (needEOLCheck
&& sc
.atLineEnd
) {
765 ProcessLineEnd(sc
, fstringStateStack
, currentFStringExp
, inContinuedString
);
767 styler
.IndentAmount(lineCurrent
, &spaceFlags
, IsPyComment
);
772 // If in f-string expression, check for }, :, ! to resume f-string state or update nesting count
773 if (currentFStringExp
&& !IsPySingleQuoteStringState(sc
.state
) && !IsPyTripleQuoteStringState(sc
.state
)) {
774 if (currentFStringExp
->nestingCount
== 0 && (sc
.ch
== '}' || sc
.ch
== ':' || (sc
.ch
== '!' && sc
.chNext
!= '='))) {
775 sc
.SetState(PopFromStateStack(fstringStateStack
, currentFStringExp
));
777 if (sc
.ch
== '{' || sc
.ch
== '[' || sc
.ch
== '(') {
778 currentFStringExp
->nestingCount
++;
779 } else if (sc
.ch
== '}' || sc
.ch
== ']' || sc
.ch
== ')') {
780 currentFStringExp
->nestingCount
--;
785 // Check for a new state starting character
786 if (sc
.state
== SCE_P_DEFAULT
) {
787 if (IsADigit(sc
.ch
) || (sc
.ch
== '.' && IsADigit(sc
.chNext
))) {
788 if (sc
.ch
== '0' && (sc
.chNext
== 'x' || sc
.chNext
== 'X')) {
789 base_n_number
= true;
790 sc
.SetState(SCE_P_NUMBER
);
791 } else if (sc
.ch
== '0' &&
792 (sc
.chNext
== 'o' || sc
.chNext
== 'O' || sc
.chNext
== 'b' || sc
.chNext
== 'B')) {
793 if (options
.base2or8Literals
) {
794 base_n_number
= true;
795 sc
.SetState(SCE_P_NUMBER
);
797 sc
.SetState(SCE_P_NUMBER
);
798 sc
.ForwardSetState(SCE_P_IDENTIFIER
);
801 base_n_number
= false;
802 sc
.SetState(SCE_P_NUMBER
);
804 } else if (isoperator(sc
.ch
) || sc
.ch
== '`') {
805 sc
.SetState(SCE_P_OPERATOR
);
806 } else if (sc
.ch
== '#') {
807 sc
.SetState(sc
.chNext
== '#' ? SCE_P_COMMENTBLOCK
: SCE_P_COMMENTLINE
);
808 } else if (sc
.ch
== '@') {
809 if (IsFirstNonWhitespace(sc
.currentPos
, styler
))
810 sc
.SetState(SCE_P_DECORATOR
);
812 sc
.SetState(SCE_P_OPERATOR
);
813 } else if (IsPyStringStart(sc
.ch
, sc
.chNext
, sc
.GetRelative(2), allowedLiterals
)) {
814 Sci_PositionU nextIndex
= 0;
815 sc
.SetState(GetPyStringState(styler
, sc
.currentPos
, &nextIndex
, allowedLiterals
));
816 while (nextIndex
> (sc
.currentPos
+ 1) && sc
.More()) {
819 } else if (IsAWordStart(sc
.ch
, options
.unicodeIdentifiers
)) {
820 sc
.SetState(SCE_P_IDENTIFIER
);
824 styler
.IndicatorFill(startIndicator
, sc
.currentPos
, indicatorWhitespace
, 0);
828 static bool IsCommentLine(Sci_Position line
, Accessor
&styler
) {
829 const Sci_Position pos
= styler
.LineStart(line
);
830 const Sci_Position eol_pos
= styler
.LineStart(line
+ 1) - 1;
831 for (Sci_Position i
= pos
; i
< eol_pos
; i
++) {
832 const char ch
= styler
[i
];
835 else if (ch
!= ' ' && ch
!= '\t')
841 static bool IsQuoteLine(Sci_Position line
, const Accessor
&styler
) {
842 const int style
= styler
.StyleAt(styler
.LineStart(line
)) & 31;
843 return IsPyTripleQuoteStringState(style
);
847 void SCI_METHOD
LexerPython::Fold(Sci_PositionU startPos
, Sci_Position length
, int /*initStyle - unused*/, IDocument
*pAccess
) {
851 Accessor
styler(pAccess
, nullptr);
853 const Sci_Position maxPos
= startPos
+ length
;
854 const Sci_Position maxLines
= (maxPos
== styler
.Length()) ? styler
.GetLine(maxPos
) : styler
.GetLine(maxPos
- 1); // Requested last line
855 const Sci_Position docLines
= styler
.GetLine(styler
.Length()); // Available last line
857 // Backtrack to previous non-blank line so we can determine indent level
858 // for any white space lines (needed esp. within triple quoted strings)
859 // and so we can fix any preceding fold level (which is why we go back
860 // at least one line in all cases)
862 Sci_Position lineCurrent
= styler
.GetLine(startPos
);
863 int indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, nullptr);
864 while (lineCurrent
> 0) {
866 indentCurrent
= styler
.IndentAmount(lineCurrent
, &spaceFlags
, nullptr);
867 if (!(indentCurrent
& SC_FOLDLEVELWHITEFLAG
) &&
868 (!IsCommentLine(lineCurrent
, styler
)) &&
869 (!IsQuoteLine(lineCurrent
, styler
)))
872 int indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
874 // Set up initial loop state
875 startPos
= styler
.LineStart(lineCurrent
);
876 int prev_state
= SCE_P_DEFAULT
& 31;
877 if (lineCurrent
>= 1)
878 prev_state
= styler
.StyleAt(startPos
- 1) & 31;
879 int prevQuote
= options
.foldQuotes
&& IsPyTripleQuoteStringState(prev_state
);
881 // Process all characters to end of requested range or end of any triple quote
882 //that hangs over the end of the range. Cap processing in all cases
883 // to end of document (in case of unclosed quote at end).
884 while ((lineCurrent
<= docLines
) && ((lineCurrent
<= maxLines
) || prevQuote
)) {
887 int lev
= indentCurrent
;
888 Sci_Position lineNext
= lineCurrent
+ 1;
889 int indentNext
= indentCurrent
;
891 if (lineNext
<= docLines
) {
892 // Information about next line is only available if not at end of document
893 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, nullptr);
894 const Sci_Position lookAtPos
= (styler
.LineStart(lineNext
) == styler
.Length()) ? styler
.Length() - 1 : styler
.LineStart(lineNext
);
895 const int style
= styler
.StyleAt(lookAtPos
) & 31;
896 quote
= options
.foldQuotes
&& IsPyTripleQuoteStringState(style
);
898 const int quote_start
= (quote
&& !prevQuote
);
899 const int quote_continue
= (quote
&& prevQuote
);
900 if (!quote
|| !prevQuote
)
901 indentCurrentLevel
= indentCurrent
& SC_FOLDLEVELNUMBERMASK
;
903 indentNext
= indentCurrentLevel
;
904 if (indentNext
& SC_FOLDLEVELWHITEFLAG
)
905 indentNext
= SC_FOLDLEVELWHITEFLAG
| indentCurrentLevel
;
908 // Place fold point at start of triple quoted string
909 lev
|= SC_FOLDLEVELHEADERFLAG
;
910 } else if (quote_continue
|| prevQuote
) {
911 // Add level to rest of lines in the string
915 // Skip past any blank lines for next indent level info; we skip also
916 // comments (all comments, not just those starting in column 0)
917 // which effectively folds them into surrounding code rather
918 // than screwing up folding. If comments end file, use the min
919 // comment indent as the level after
921 int minCommentLevel
= indentCurrentLevel
;
923 (lineNext
< docLines
) &&
924 ((indentNext
& SC_FOLDLEVELWHITEFLAG
) ||
925 (lineNext
<= docLines
&& IsCommentLine(lineNext
, styler
)))) {
927 if (IsCommentLine(lineNext
, styler
) && indentNext
< minCommentLevel
) {
928 minCommentLevel
= indentNext
;
932 indentNext
= styler
.IndentAmount(lineNext
, &spaceFlags
, nullptr);
935 const int levelAfterComments
= ((lineNext
< docLines
) ? indentNext
& SC_FOLDLEVELNUMBERMASK
: minCommentLevel
);
936 const int levelBeforeComments
= std::max(indentCurrentLevel
, levelAfterComments
);
938 // Now set all the indent levels on the lines we skipped
939 // Do this from end to start. Once we encounter one line
940 // which is indented more than the line after the end of
941 // the comment-block, use the level of the block before
943 Sci_Position skipLine
= lineNext
;
944 int skipLevel
= levelAfterComments
;
946 while (--skipLine
> lineCurrent
) {
947 const int skipLineIndent
= styler
.IndentAmount(skipLine
, &spaceFlags
, nullptr);
949 if (options
.foldCompact
) {
950 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
)
951 skipLevel
= levelBeforeComments
;
953 const int whiteFlag
= skipLineIndent
& SC_FOLDLEVELWHITEFLAG
;
955 styler
.SetLevel(skipLine
, skipLevel
| whiteFlag
);
957 if ((skipLineIndent
& SC_FOLDLEVELNUMBERMASK
) > levelAfterComments
&&
958 !(skipLineIndent
& SC_FOLDLEVELWHITEFLAG
) &&
959 !IsCommentLine(skipLine
, styler
))
960 skipLevel
= levelBeforeComments
;
962 styler
.SetLevel(skipLine
, skipLevel
);
966 // Set fold header on non-quote line
967 if (!quote
&& !(indentCurrent
& SC_FOLDLEVELWHITEFLAG
)) {
968 if ((indentCurrent
& SC_FOLDLEVELNUMBERMASK
) < (indentNext
& SC_FOLDLEVELNUMBERMASK
))
969 lev
|= SC_FOLDLEVELHEADERFLAG
;
972 // Keep track of triple quote state of previous line
975 // Set fold level for this line and move to next line
976 styler
.SetLevel(lineCurrent
, options
.foldCompact
? lev
: lev
& ~SC_FOLDLEVELWHITEFLAG
);
977 indentCurrent
= indentNext
;
978 lineCurrent
= lineNext
;
981 // NOTE: Cannot set level of last line here because indentCurrent doesn't have
982 // header flag set; the loop above is crafted to take care of this case!
983 //styler.SetLevel(lineCurrent, indentCurrent);
986 LexerModule
lmPython(SCLEX_PYTHON
, LexerPython::LexerFactoryPython
, "python",