3 # This file is Copyright 2003, 2006, 2007, 2009, 2010 Dean Hall.
5 # This file is part of the Python-on-a-Chip program.
6 # Python-on-a-Chip is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1.
9 # Python-on-a-Chip is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 # A copy of the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1
13 # is seen in the file COPYING up one directory from this.
18 PyCscope creates a Cscope-like index file for a tree of Python source.
25 # @brief PyCscope creates a Cscope-like index file for a tree of Python source.
28 # Improvements contributed by K. Rader of Google:
29 # - Added the `-i` argument to specify a file-list file
30 # - Fixups to the header and footer to make a valid file that cscope can read
34 __author__
= "Dean Hall"
35 __copyright__
= "Copyright 2003, 2006, 2007, 2009, 2010 Dean Hall. See LICENSE for details."
36 __date__
= "2007/12/25"
38 __usage__
= """Usage: pycscope.py [-R] [-f reffile] [-i srclistfile] [files ...]
40 -R Recurse directories for files.
41 -f reffile Use reffile as cross-ref file name instead of cscope.out.
42 -i srclistfile Use a file that contains a list of source files to scan."""
45 import getopt
, sys
, os
, os
.path
, string
, types
46 import keyword
, parser
, symbol
, token
48 # Marks as defined by Cscope
51 MARK_FUNC_CALL
= "\t`"
53 MARK_INCLUDE
= "\t~<" # TODO: assume all includes are global for now
57 MARK_FUNC_PARM
= "\tp"
59 # Reverse the key,value pairs in the token dict
60 tok_name_lookup
= dict((v
,k
) for k
,v
in token
.tok_name
.iteritems())
61 TOK_NEWLINE
= tok_name_lookup
["NEWLINE"]
62 TOK_NAME
= tok_name_lookup
["NAME"]
63 TOK_LPAR
= tok_name_lookup
["LPAR"]
64 TOK_ENDMARKER
= tok_name_lookup
["ENDMARKER"]
65 TOK_INDENT
= tok_name_lookup
["INDENT"]
66 TOK_DEDENT
= tok_name_lookup
["DEDENT"]
68 # Reverse the key,value pairs in the symbol dict
69 sym_name_lookup
= dict((v
,k
) for k
,v
in symbol
.sym_name
.iteritems())
70 SYM_TRAILER
= sym_name_lookup
["trailer"]
71 SYM_VARARGSLIST
= sym_name_lookup
["varargslist"]
73 # Get the list of Python keywords and add a few common builtins
74 kwlist
= keyword
.kwlist
75 kwlist
.extend(("True", "False", "None", "object"))
77 # Globals for the recursive walkAst function
88 """Parse command line args and act accordingly.
90 # Parse the command line arguments
92 opts
, args
= getopt
.getopt(sys
.argv
[1:], "Rf:i:")
93 except getopt
.GetoptError
:
97 indexfn
= "cscope.out"
104 args
.extend(map(string
.rstrip
, open(a
, 'r').readlines()))
106 # Create the buffer to store the output (list of strings)
110 # Search current dir by default
114 # Parse the given list of files/dirs
115 basepath
= os
.getcwd()
117 if os
.path
.isdir(os
.path
.join(basepath
, name
)):
118 parseDir(basepath
, name
, indexbuff
, recurse
, fnamesbuff
)
121 parseFile(basepath
, name
, indexbuff
, fnamesbuff
)
125 # Symbol data for the last file ends with a file mark
126 indexbuff
.append("\n" + MARK_FILE
)
127 writeIndex(basepath
, indexfn
, indexbuff
, fnamesbuff
)
130 def parseDir(basepath
, relpath
, indexbuff
, recurse
, fnamesbuff
):
131 """Parses all files in the directory and
132 recurses into subdirectories if requested.
134 dirpath
= os
.path
.join(basepath
, relpath
)
135 for name
in os
.listdir(dirpath
):
136 fullpath
= os
.path
.join(dirpath
, name
)
137 if os
.path
.isdir(fullpath
) and recurse
:
138 parseDir(basepath
, os
.path
.join(relpath
, name
), indexbuff
, recurse
,
142 parseFile(basepath
, os
.path
.join(relpath
, name
), indexbuff
,
148 def parseFile(basepath
, relpath
, indexbuff
, fnamesbuff
):
149 """Parses a source file and puts the resulting index into the buffer.
151 # Don't parse if it's not python source
152 if relpath
[-3:] != ".py":
155 # Open the file and get the contents
156 fullpath
= os
.path
.join(basepath
, relpath
)
157 f
= open(fullpath
, 'r')
158 filecontents
= f
.read()
161 # Add the file mark to the index
162 fnamesbuff
.append(relpath
)
163 indexbuff
.append("\n%s%s" % (MARK_FILE
, relpath
))
165 latestnewline
= len(indexbuff
)
167 # Add path info to any syntax errors in the source files
169 parseSource(filecontents
, indexbuff
)
170 except SyntaxError, se
:
171 se
.filename
= fullpath
175 def parseSource(sourcecode
, indexbuff
):
176 """Parses python source code and puts the resulting index into the buffer.
178 # Parse the source to an Abstract Syntax Tree
179 ast
= parser
.suite(sourcecode
)
180 astlist
= parser
.ast2list(ast
, True)
182 # Set these globals before each file's AST is walked
183 global sourcelinehassymbol
184 sourcelinehassymbol
= False
185 global currentlinenum
188 # Walk the AST to index the rest of the file
189 walkAst(astlist
, indexbuff
)
192 def walkAst(astlist
, indexbuff
):
193 """Scan the AST for tokens, write out index lines.
200 global sourcelinehassymbol
203 global currentlinenum
205 # Remember the latest symbol
207 latestsymbol
= astlist
[0]
211 # Save the previous token and get the latest one
212 prevtoken
= latesttoken
213 latesttoken
= astlist
[0]
215 # If this code is on a new line number
216 if astlist
[2] != currentlinenum
:
217 currentlinenum
= astlist
[2]
219 # If there was a symbol of interest,
220 # remember this location in the index
221 if sourcelinehassymbol
:
222 latestnewline
= len(indexbuff
)
223 sourcelinehassymbol
= False
225 # If there was no symbol of interest between this and the previous
226 # newline, remove all entries added since the previous newline
228 del indexbuff
[latestnewline
:]
230 # Write the new line number
231 indexbuff
.append("\n\n%d " % astlist
[2])
233 # Clear an include mark when a newline token is reached
234 # This is what ends a comma-separated list of modules after import
235 if mark
== MARK_INCLUDE
:
238 if latesttoken
== TOK_NAME
:
239 # If a name is not a python keyword, it is a symbol of interest
240 if astlist
[1] not in kwlist
:
242 # Remember that there is a symbol of interest
243 sourcelinehassymbol
= True
245 # Write the mark and the symbol
246 indexbuff
.append("\n%s%s\n" % (mark
, astlist
[1]))
248 # Clear the mark unless it's an include mark
249 # This is what allows a comma-separated list of modules after import
250 if mark
!= MARK_INCLUDE
:
253 # If the name is a python keyword
255 # Some keywords determine what mark should prefix the next name
260 # Remember that we're in a function definition
268 # Write out the keyword
269 indexbuff
.append("%s " % kw
)
271 # This set of tokens and symbols indicates a function call (not perfect)
272 elif (latesttoken
== TOK_LPAR
) and (prevtoken
== TOK_NAME
) and (
273 (latestsymbol
== SYM_TRAILER
) or (latestsymbol
== SYM_VARARGSLIST
)):
275 # Insert a function-call mark before the previous name
276 indexbuff
[-1] = "\n%s%s( " % (MARK_FUNC_CALL
, indexbuff
[-1][1:])
278 # Count the number of indents; to be used by dedent
279 elif latesttoken
== TOK_INDENT
:
283 # When dedent reaches the level of the function def,
284 # write the function-end mark
285 elif latesttoken
== TOK_DEDENT
:
289 indexbuff
.insert(-1, "\n\n%d \n%s\n" % (astlist
[2], MARK_FUNC_END
))
293 # Replace the last line number placeholder with a newline
294 # when at the end of a file
295 elif latesttoken
== TOK_ENDMARKER
:
296 if len(indexbuff
) > 0:
299 # For uninteresting tokens, just write the accompanying string
301 if len(astlist
[1]) > 0:
302 nonsymboltext
= astlist
[1].replace("\n","\\n") + ' '
305 indexbuff
.append(nonsymboltext
)
307 # Recurse into all nodes
308 for i
in range(1, len(astlist
)):
309 if type(astlist
[i
]) == types
.ListType
:
310 walkAst(astlist
[i
], indexbuff
)
313 def writeIndex(basepath
, indexfn
, indexbuff
, fnamesbuff
):
314 """Write the index buffer to the output file.
316 fout
= open(os
.path
.join(basepath
, indexfn
), 'w')
318 # Write the header and index
319 index
= ''.join(indexbuff
)
320 index_len
= len(index
)
321 hdr_len
= len(basepath
) + 25
322 fout
.write("cscope 15 %s -c %010d" % (basepath
, hdr_len
+ index_len
))
326 fnames
= '\n'.join(fnamesbuff
) + '\n'
327 fout
.write("\n1\n.\n0\n")
328 fout
.write("%d\n" % len(fnamesbuff
))
329 fout
.write("%d\n" % len(fnames
))
334 if __name__
== "__main__":