Roll src/third_party/WebKit f007c95:0171005 (svn 185074:185088)
[chromium-blink-merge.git] / tools / code_coverage / croc_scan.py
blob8d0e2e8df2a17820aa1576305442cdc4b94e985e
1 # Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 """Crocodile source scanners."""
8 import re
11 class Scanner(object):
12 """Generic source scanner."""
14 def __init__(self):
15 """Constructor."""
17 self.re_token = re.compile('#')
18 self.comment_to_eol = ['#']
19 self.comment_start = None
20 self.comment_end = None
22 def ScanLines(self, lines):
23 """Scans the lines for executable statements.
25 Args:
26 lines: Iterator returning source lines.
28 Returns:
29 An array of line numbers which are executable.
30 """
31 exe_lines = []
32 lineno = 0
34 in_string = None
35 in_comment = None
36 comment_index = None
38 for line in lines:
39 lineno += 1
40 in_string_at_start = in_string
42 for t in self.re_token.finditer(line):
43 tokenstr = t.groups()[0]
45 if in_comment:
46 # Inside a multi-line comment, so look for end token
47 if tokenstr == in_comment:
48 in_comment = None
49 # Replace comment with spaces
50 line = (line[:comment_index]
51 + ' ' * (t.end(0) - comment_index)
52 + line[t.end(0):])
54 elif in_string:
55 # Inside a string, so look for end token
56 if tokenstr == in_string:
57 in_string = None
59 elif tokenstr in self.comment_to_eol:
60 # Single-line comment, so truncate line at start of token
61 line = line[:t.start(0)]
62 break
64 elif tokenstr == self.comment_start:
65 # Multi-line comment start - end token is comment_end
66 in_comment = self.comment_end
67 comment_index = t.start(0)
69 else:
70 # Starting a string - end token is same as start
71 in_string = tokenstr
73 # If still in comment at end of line, remove comment
74 if in_comment:
75 line = line[:comment_index]
76 # Next line, delete from the beginnine
77 comment_index = 0
79 # If line-sans-comments is not empty, claim it may be executable
80 if line.strip() or in_string_at_start:
81 exe_lines.append(lineno)
83 # Return executable lines
84 return exe_lines
86 def Scan(self, filename):
87 """Reads the file and scans its lines.
89 Args:
90 filename: Path to file to scan.
92 Returns:
93 An array of line numbers which are executable.
94 """
96 # TODO: All manner of error checking
97 f = None
98 try:
99 f = open(filename, 'rt')
100 return self.ScanLines(f)
101 finally:
102 if f:
103 f.close()
106 class PythonScanner(Scanner):
107 """Python source scanner."""
109 def __init__(self):
110 """Constructor."""
111 Scanner.__init__(self)
113 # TODO: This breaks for strings ending in more than 2 backslashes. Need
114 # a pattern which counts only an odd number of backslashes, so the last
115 # one thus escapes the quote.
116 self.re_token = re.compile(r'(#|\'\'\'|"""|(?<!(?<!\\)\\)["\'])')
117 self.comment_to_eol = ['#']
118 self.comment_start = None
119 self.comment_end = None
122 class CppScanner(Scanner):
123 """C / C++ / ObjC / ObjC++ source scanner."""
125 def __init__(self):
126 """Constructor."""
127 Scanner.__init__(self)
129 # TODO: This breaks for strings ending in more than 2 backslashes. Need
130 # a pattern which counts only an odd number of backslashes, so the last
131 # one thus escapes the quote.
132 self.re_token = re.compile(r'(^\s*#|//|/\*|\*/|(?<!(?<!\\)\\)["\'])')
134 # TODO: Treat '\' at EOL as a token, and handle it as continuing the
135 # previous line. That is, if in a comment-to-eol, this line is a comment
136 # too.
138 # Note that we treat # at beginning of line as a comment, so that we ignore
139 # preprocessor definitions
140 self.comment_to_eol = ['//', '#']
142 self.comment_start = '/*'
143 self.comment_end = '*/'
146 def ScanFile(filename, language):
147 """Scans a file for executable lines.
149 Args:
150 filename: Path to file to scan.
151 language: Language for file ('C', 'C++', 'python', 'ObjC', 'ObjC++')
153 Returns:
154 A list of executable lines, or an empty list if the file was not a handled
155 language.
158 if language == 'python':
159 return PythonScanner().Scan(filename)
160 elif language in ['C', 'C++', 'ObjC', 'ObjC++']:
161 return CppScanner().Scan(filename)
163 # Something we don't handle
164 return []