1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Crocodile source scanners."""
11 class Scanner(object):
12 """Generic source scanner."""
17 self
.re_token
= re
.compile('#')
18 self
.comment_to_eol
= ['#']
19 self
.comment_start
= None
20 self
.comment_end
= None
22 def ScanLines(self
, lines
):
23 """Scans the lines for executable statements.
26 lines: Iterator returning source lines.
29 An array of line numbers which are executable.
40 in_string_at_start
= in_string
42 for t
in self
.re_token
.finditer(line
):
43 tokenstr
= t
.groups()[0]
46 # Inside a multi-line comment, so look for end token
47 if tokenstr
== in_comment
:
49 # Replace comment with spaces
50 line
= (line
[:comment_index
]
51 + ' ' * (t
.end(0) - comment_index
)
55 # Inside a string, so look for end token
56 if tokenstr
== in_string
:
59 elif tokenstr
in self
.comment_to_eol
:
60 # Single-line comment, so truncate line at start of token
61 line
= line
[:t
.start(0)]
64 elif tokenstr
== self
.comment_start
:
65 # Multi-line comment start - end token is comment_end
66 in_comment
= self
.comment_end
67 comment_index
= t
.start(0)
70 # Starting a string - end token is same as start
73 # If still in comment at end of line, remove comment
75 line
= line
[:comment_index
]
76 # Next line, delete from the beginnine
79 # If line-sans-comments is not empty, claim it may be executable
80 if line
.strip() or in_string_at_start
:
81 exe_lines
.append(lineno
)
83 # Return executable lines
86 def Scan(self
, filename
):
87 """Reads the file and scans its lines.
90 filename: Path to file to scan.
93 An array of line numbers which are executable.
96 # TODO: All manner of error checking
99 f
= open(filename
, 'rt')
100 return self
.ScanLines(f
)
106 class PythonScanner(Scanner
):
107 """Python source scanner."""
111 Scanner
.__init
__(self
)
113 # TODO: This breaks for strings ending in more than 2 backslashes. Need
114 # a pattern which counts only an odd number of backslashes, so the last
115 # one thus escapes the quote.
116 self
.re_token
= re
.compile(r
'(#|\'\'\'|
"""|(?<!(?<!\\)\\)["\'])')
117 self.comment_to_eol = ['#']
118 self.comment_start = None
119 self.comment_end = None
122 class CppScanner(Scanner):
123 """C
/ C
++ / ObjC
/ ObjC
++ source scanner
."""
127 Scanner.__init__(self)
129 # TODO: This breaks for strings ending in more than 2 backslashes. Need
130 # a pattern which counts only an odd number of backslashes, so the last
131 # one thus escapes the quote.
132 self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])')
134 # TODO: Treat '\' at EOL as a token, and handle it as continuing the
135 # previous line. That is, if in a comment-to-eol, this line is a comment
138 # Note that we treat # at beginning of line as a comment, so that we ignore
139 # preprocessor definitions
140 self.comment_to_eol = ['//', '#']
142 self.comment_start = '/*'
143 self.comment_end = '*/'
146 def ScanFile(filename, language):
147 """Scans a
file for executable lines
.
150 filename
: Path to
file to scan
.
151 language
: Language
for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')
154 A
list of executable lines
, or an empty
list if the
file was
not a handled
158 if language == 'python':
159 return PythonScanner().Scan(filename)
160 elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
161 return CppScanner().Scan(filename)
163 # Something we don't handle