2 * $Id: python.c 752 2010-02-27 17:52:46Z elliotth $
4 * Copyright (c) 2000-2003, Darren Hiebert
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for Python language
15 #include "general.h" /* must always come first */
30 typedef struct NestingLevel NestingLevel
;
31 typedef struct NestingLevels NestingLevels
;
43 int n
; /* number of levels in use */
48 K_CLASS
, K_FUNCTION
, K_MEMBER
, K_VARIABLE
, K_IMPORT
54 static kindOption PythonKinds
[] = {
55 {TRUE
, 'c', "class", "classes"},
56 {TRUE
, 'f', "function", "functions"},
57 {TRUE
, 'm', "member", "class members"},
58 {TRUE
, 'v', "variable", "variables"},
59 {TRUE
, 'i', "namespace", "imports"}
62 static char const * const singletriple
= "'''";
63 static char const * const doubletriple
= "\"\"\"";
66 * FUNCTION DEFINITIONS
69 static NestingLevels
*nestingLevelsNew (void)
71 NestingLevels
*nls
= xCalloc (1, NestingLevels
);
75 static void nestingLevelsFree (NestingLevels
*nls
)
78 for (i
= 0; i
< nls
->allocated
; i
++)
79 vStringDelete(nls
->levels
[i
].name
);
80 if (nls
->levels
) eFree(nls
->levels
);
84 static void nestingLevelsPush (NestingLevels
*nls
,
85 const vString
*name
, int type
)
87 NestingLevel
*nl
= NULL
;
89 if (nls
->n
>= nls
->allocated
)
92 nls
->levels
= xRealloc(nls
->levels
,
93 nls
->allocated
, NestingLevel
);
94 nls
->levels
[nls
->n
].name
= vStringNew();
96 nl
= &nls
->levels
[nls
->n
];
99 vStringCopy(nl
->name
, name
);
104 static NestingLevel
*nestingLevelsGetCurrent (NestingLevels
*nls
)
106 Assert (nls
!= NULL
);
111 return &nls
->levels
[nls
->n
- 1];
114 static void nestingLevelsPop (NestingLevels
*nls
)
116 const NestingLevel
*nl
= nestingLevelsGetCurrent(nls
);
119 vStringClear(nl
->name
);
124 static boolean
isIdentifierFirstCharacter (int c
)
126 return (boolean
) (isalpha (c
) || c
== '_');
129 static boolean
isIdentifierCharacter (int c
)
131 return (boolean
) (isalnum (c
) || c
== '_');
134 /* Given a string with the contents of a line directly after the "def" keyword,
135 * extract all relevant information and create a tag.
137 static void makeFunctionTag (vString
*const function
,
138 vString
*const parent
, int is_class_parent
, const char *arglist __unused__
)
141 initTagEntry (&tag
, vStringValue (function
));
143 tag
.kindName
= "function";
145 /* tag.extensionFields.arglist = arglist; */
147 if (vStringLength (parent
) > 0)
151 tag
.kindName
= "member";
153 tag
.extensionFields
.scope
[0] = "class";
154 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
158 tag
.extensionFields
.scope
[0] = "function";
159 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
163 /* If a function starts with __, we mark it as file scope.
164 * FIXME: What is the proper way to signal such attributes?
165 * TODO: What does functions/classes starting with _ and __ mean in python?
167 if (strncmp (vStringValue (function
), "__", 2) == 0 &&
168 strcmp (vStringValue (function
), "__init__") != 0)
170 tag
.extensionFields
.access
= "private";
171 tag
.isFileScope
= TRUE
;
175 tag
.extensionFields
.access
= "public";
180 /* Given a string with the contents of the line directly after the "class"
181 * keyword, extract all necessary information and create a tag.
183 static void makeClassTag (vString
*const class, vString
*const inheritance
,
184 vString
*const parent
, int is_class_parent
)
187 initTagEntry (&tag
, vStringValue (class));
188 tag
.kindName
= "class";
190 if (vStringLength (parent
) > 0)
194 tag
.extensionFields
.scope
[0] = "class";
195 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
199 tag
.extensionFields
.scope
[0] = "function";
200 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
203 tag
.extensionFields
.inheritance
= vStringValue (inheritance
);
207 static void makeVariableTag (vString
*const var
, vString
*const parent
)
210 initTagEntry (&tag
, vStringValue (var
));
211 tag
.kindName
= "variable";
213 if (vStringLength (parent
) > 0)
215 tag
.extensionFields
.scope
[0] = "class";
216 tag
.extensionFields
.scope
[1] = vStringValue (parent
);
221 /* Skip a single or double quoted string. */
222 static const char *skipString (const char *cp
)
224 const char *start
= cp
;
226 for (cp
++; *cp
; cp
++)
230 else if (*cp
== '\\')
232 else if (*cp
== *start
)
238 /* Skip everything up to an identifier start. */
239 static const char *skipEverything (const char *cp
)
243 if (*cp
== '"' || *cp
== '\'' || *cp
== '#')
248 if (isIdentifierFirstCharacter ((int) *cp
))
254 /* Skip an identifier. */
255 static const char *skipIdentifier (const char *cp
)
257 while (isIdentifierCharacter ((int) *cp
))
262 static const char *findDefinitionOrClass (const char *cp
)
266 cp
= skipEverything (cp
);
267 if (!strncmp(cp
, "def", 3) || !strncmp(cp
, "class", 5) ||
268 !strncmp(cp
, "cdef", 4) || !strncmp(cp
, "cpdef", 5))
272 cp
= skipIdentifier (cp
);
277 static const char *skipSpace (const char *cp
)
279 while (isspace ((int) *cp
))
284 /* Starting at ''cp'', parse an identifier into ''identifier''. */
285 static const char *parseIdentifier (const char *cp
, vString
*const identifier
)
287 vStringClear (identifier
);
288 while (isIdentifierCharacter ((int) *cp
))
290 vStringPut (identifier
, (int) *cp
);
293 vStringTerminate (identifier
);
297 static void parseClass (const char *cp
, vString
*const class,
298 vString
*const parent
, int is_class_parent
)
300 vString
*const inheritance
= vStringNew ();
301 vStringClear (inheritance
);
302 cp
= parseIdentifier (cp
, class);
311 /* Closing parenthesis can be in follow up line. */
312 cp
= (const char *) fileReadLine ();
314 vStringPut (inheritance
, ' ');
317 vStringPut (inheritance
, *cp
);
320 vStringTerminate (inheritance
);
322 makeClassTag (class, inheritance
, parent
, is_class_parent
);
323 vStringDelete (inheritance
);
326 static void parseImports (const char *cp
)
329 vString
*name
, *name_next
;
331 cp
= skipEverything (cp
);
333 if ((pos
= strstr (cp
, "import")) == NULL
)
338 /* continue only if there is some space between the keyword and the identifier */
345 name
= vStringNew ();
346 name_next
= vStringNew ();
348 cp
= skipEverything (cp
);
351 cp
= parseIdentifier (cp
, name
);
353 cp
= skipEverything (cp
);
354 /* we parse the next possible import statement as well to be able to ignore 'foo' in
355 * 'import foo as bar' */
356 parseIdentifier (cp
, name_next
);
358 /* take the current tag only if the next one is not "as" */
359 if (strcmp (vStringValue (name_next
), "as") != 0 &&
360 strcmp (vStringValue (name
), "as") != 0)
362 makeSimpleTag (name
, PythonKinds
, K_IMPORT
);
365 vStringDelete (name
);
366 vStringDelete (name_next
);
369 /* modified from get.c getArglistFromStr().
370 * warning: terminates rest of string past arglist!
371 * note: does not ignore brackets inside strings! */
372 static char *parseArglist(const char *buf
)
378 if (NULL
== (start
= strchr(buf
, '(')))
380 for (level
= 1, end
= start
+ 1; level
> 0; ++end
)
384 else if ('(' == *end
)
386 else if (')' == *end
)
390 return strdup(start
);
393 static void parseFunction (const char *cp
, vString
*const def
,
394 vString
*const parent
, int is_class_parent
)
398 cp
= parseIdentifier (cp
, def
);
399 arglist
= parseArglist (cp
);
400 makeFunctionTag (def
, parent
, is_class_parent
, arglist
);
401 if (arglist
!= NULL
) {
406 /* Get the combined name of a nested symbol. Classes are separated with ".",
407 * functions with "/". For example this code:
414 * Would produce this string:
415 * MyClass.MyFunction/SubFunction/SubClass.Method
417 static boolean
constructParentString(NestingLevels
*nls
, int indent
,
421 NestingLevel
*prev
= NULL
;
422 int is_class
= FALSE
;
423 vStringClear (result
);
424 for (i
= 0; i
< nls
->n
; i
++)
426 NestingLevel
*nl
= nls
->levels
+ i
;
427 if (indent
<= nl
->indentation
)
431 vStringCatS(result
, "."); /* make Geany symbol list grouping work properly */
433 if (prev->type == K_CLASS)
434 vStringCatS(result, ".");
436 vStringCatS(result, "/");
439 vStringCat(result
, nl
->name
);
440 is_class
= (nl
->type
== K_CLASS
);
446 /* Check whether parent's indentation level is higher than the current level and
449 static void checkParent(NestingLevels
*nls
, int indent
, vString
*parent
)
454 for (i
= 0; i
< nls
->n
; i
++)
457 /* is there a better way to compare two vStrings? */
458 if (strcmp(vStringValue(parent
), vStringValue(n
->name
)) == 0)
460 if (n
&& indent
<= n
->indentation
)
462 /* remove this level by clearing its name */
463 vStringClear(n
->name
);
470 static void addNestingLevel(NestingLevels
*nls
, int indentation
,
471 const vString
*name
, boolean is_class
)
474 NestingLevel
*nl
= NULL
;
476 for (i
= 0; i
< nls
->n
; i
++)
478 nl
= nls
->levels
+ i
;
479 if (indentation
<= nl
->indentation
) break;
483 nestingLevelsPush(nls
, name
, 0);
484 nl
= nls
->levels
+ i
;
487 { /* reuse existing slot */
489 vStringCopy(nl
->name
, name
);
491 nl
->indentation
= indentation
;
492 nl
->type
= is_class
? K_CLASS
: !K_CLASS
;
495 /* Return a pointer to the start of the next triple string, or NULL. Store
496 * the kind of triple string in "which" if the return is not NULL.
498 static char const *find_triple_start(char const *string
, char const **which
)
500 char const *cp
= string
;
504 if (*cp
== '"' || *cp
== '\'')
506 if (strncmp(cp
, doubletriple
, 3) == 0)
508 *which
= doubletriple
;
511 if (strncmp(cp
, singletriple
, 3) == 0)
513 *which
= singletriple
;
523 /* Find the end of a triple string as pointed to by "which", and update "which"
524 * with any other triple strings following in the given string.
526 static void find_triple_end(char const *string
, char const **which
)
528 char const *s
= string
;
531 /* Check if the string ends in the same line. */
532 s
= strstr (s
, *which
);
536 /* If yes, check if another one starts in the same line. */
537 s
= find_triple_start(s
, which
);
543 static const char *findVariable(const char *line
)
545 /* Parse global and class variable names (C.x) from assignment statements.
546 * Object attributes (obj.x) are ignored.
547 * Assignment to a tuple 'x, y = 2, 3' not supported.
548 * TODO: ignore duplicate tags from reassignment statements. */
549 const char *cp
, *sp
, *eq
, *start
;
551 cp
= strstr(line
, "=");
558 return NULL
; /* ignore '==' operator and 'x=5,y=6)' function lines */
559 if (*eq
== '(' || *eq
== '#')
560 break; /* allow 'x = func(b=2,y=2,' lines and comments at the end of line */
564 /* go backwards to the start of the line, checking we have valid chars */
566 while (start
>= line
&& isspace ((int) *start
))
568 while (start
>= line
&& isIdentifierCharacter ((int) *start
))
570 if (!isIdentifierFirstCharacter(*(start
+ 1)))
573 while (sp
>= line
&& isspace ((int) *sp
))
575 if ((sp
+ 1) != line
) /* the line isn't a simple variable assignment */
577 /* the line is valid, parse the variable name */
582 /* Skip type declaration that optionally follows a cdef/cpdef */
583 static const char *skipTypeDecl (const char *cp
, boolean
*is_class
)
585 const char *lastStart
= cp
, *ptr
= cp
;
588 if (!strncmp("extern", ptr
, 6)) {
590 ptr
= skipSpace(ptr
);
591 if (!strncmp("from", ptr
, 4)) { return NULL
; }
593 if (!strncmp("class", ptr
, 5)) {
596 ptr
= skipSpace(ptr
);
599 /* limit so that we don't pick off "int item=obj()" */
600 while (*ptr
&& loopCount
++ < 2) {
601 while (*ptr
&& *ptr
!= '=' && *ptr
!= '(' && !isspace(*ptr
)) ptr
++;
602 if (!*ptr
|| *ptr
== '=') return NULL
;
604 return lastStart
; /* if we stopped on a '(' we are done */
606 ptr
= skipSpace(ptr
);
608 while (*lastStart
== '*') lastStart
++; /* cdef int *identifier */
613 static void findPythonTags (void)
615 vString
*const continuation
= vStringNew ();
616 vString
*const name
= vStringNew ();
617 vString
*const parent
= vStringNew();
619 NestingLevels
*const nesting_levels
= nestingLevelsNew();
623 char const *longStringLiteral
= NULL
;
625 while ((line
= (const char *) fileReadLine ()) != NULL
)
627 const char *cp
= line
, *candidate
;
628 char const *longstring
;
629 char const *keyword
, *variable
;
634 if (*cp
== '\0') /* skip blank line */
637 /* Skip comment if we are not inside a multi-line string. */
638 if (*cp
== '#' && !longStringLiteral
)
641 /* Deal with line continuation. */
642 if (!line_skip
) vStringClear(continuation
);
643 vStringCatS(continuation
, line
);
644 vStringStripTrailing(continuation
);
645 if (vStringLast(continuation
) == '\\')
647 vStringChop(continuation
);
648 vStringCatS(continuation
, " ");
652 cp
= line
= vStringValue(continuation
);
657 checkParent(nesting_levels
, indent
, parent
);
659 /* Deal with multiline string ending. */
660 if (longStringLiteral
)
662 find_triple_end(cp
, &longStringLiteral
);
666 /* Deal with multiline string start. */
667 longstring
= find_triple_start(cp
, &longStringLiteral
);
671 find_triple_end(longstring
, &longStringLiteral
);
672 /* We don't parse for any tags in the rest of the line. */
676 /* Deal with def and class keywords. */
677 keyword
= findDefinitionOrClass (cp
);
680 boolean found
= FALSE
;
681 boolean is_class
= FALSE
;
682 if (!strncmp (keyword
, "def ", 4))
684 cp
= skipSpace (keyword
+ 3);
687 else if (!strncmp (keyword
, "class ", 6))
689 cp
= skipSpace (keyword
+ 5);
693 else if (!strncmp (keyword
, "cdef ", 5))
695 cp
= skipSpace(keyword
+ 4);
696 candidate
= skipTypeDecl (cp
, &is_class
);
704 else if (!strncmp (keyword
, "cpdef ", 6))
706 cp
= skipSpace(keyword
+ 5);
707 candidate
= skipTypeDecl (cp
, &is_class
);
717 boolean is_parent_class
;
720 constructParentString(nesting_levels
, indent
, parent
);
723 parseClass (cp
, name
, parent
, is_parent_class
);
725 parseFunction(cp
, name
, parent
, is_parent_class
);
727 addNestingLevel(nesting_levels
, indent
, name
, is_class
);
730 /* Find global and class variables */
731 variable
= findVariable(line
);
734 const char *start
= variable
;
735 boolean parent_is_class
;
738 while (isIdentifierCharacter ((int) *start
))
740 vStringPut (name
, (int) *start
);
743 vStringTerminate (name
);
745 parent_is_class
= constructParentString(nesting_levels
, indent
, parent
);
746 /* skip variables in methods */
747 if (! parent_is_class
&& vStringLength(parent
) > 0)
750 makeVariableTag (name
, parent
);
752 /* Find and parse imports */
755 /* Clean up all memory we allocated. */
756 vStringDelete (parent
);
757 vStringDelete (name
);
758 vStringDelete (continuation
);
759 nestingLevelsFree (nesting_levels
);
762 extern parserDefinition
*PythonParser (void)
764 static const char *const extensions
[] = { "py", "pyx", "pxd", "pxi" ,"scons", NULL
};
765 parserDefinition
*def
= parserNew ("Python");
766 def
->kinds
= PythonKinds
;
767 def
->kindCount
= KIND_COUNT (PythonKinds
);
768 def
->extensions
= extensions
;
769 def
->parser
= findPythonTags
;
773 /* vi:set tabstop=4 shiftwidth=4: */