- Got rid of newmodule.c
[python/dscho.git] / Lib / tabnanny.py
blob251df27567885476375e2912381dbf5740ff7192
1 #! /usr/bin/env python
3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
5 tabnanny -- Detection of ambiguous indentation
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
13 """
15 # Released to the public domain, by Tim Peters, 15 April 1998.
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
21 __version__ = "6"
23 import os
24 import sys
25 import getopt
26 import tokenize
27 if not hasattr(tokenize, 'NL'):
28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
30 __all__ = ["check", "NannyNag", "process_tokens"]
32 verbose = 0
33 filename_only = 0
35 def errprint(*args):
36 sep = ""
37 for arg in args:
38 sys.stderr.write(sep + str(arg))
39 sep = " "
40 sys.stderr.write("\n")
42 def main():
43 global verbose, filename_only
44 try:
45 opts, args = getopt.getopt(sys.argv[1:], "qv")
46 except getopt.error, msg:
47 errprint(msg)
48 return
49 for o, a in opts:
50 if o == '-q':
51 filename_only = filename_only + 1
52 if o == '-v':
53 verbose = verbose + 1
54 if not args:
55 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
56 return
57 for arg in args:
58 check(arg)
60 class NannyNag(Exception):
61 """
62 Raised by tokeneater() if detecting an ambiguous indent.
63 Captured and handled in check().
64 """
65 def __init__(self, lineno, msg, line):
66 self.lineno, self.msg, self.line = lineno, msg, line
67 def get_lineno(self):
68 return self.lineno
69 def get_msg(self):
70 return self.msg
71 def get_line(self):
72 return self.line
74 def check(file):
75 """check(file_or_dir)
77 If file_or_dir is a directory and not a symbolic link, then recursively
78 descend the directory tree named by file_or_dir, checking all .py files
79 along the way. If file_or_dir is an ordinary Python source file, it is
80 checked for whitespace related problems. The diagnostic messages are
81 written to standard output using the print statement.
82 """
84 if os.path.isdir(file) and not os.path.islink(file):
85 if verbose:
86 print "%s: listing directory" % `file`
87 names = os.listdir(file)
88 for name in names:
89 fullname = os.path.join(file, name)
90 if (os.path.isdir(fullname) and
91 not os.path.islink(fullname) or
92 os.path.normcase(name[-3:]) == ".py"):
93 check(fullname)
94 return
96 try:
97 f = open(file)
98 except IOError, msg:
99 errprint("%s: I/O Error: %s" % (`file`, str(msg)))
100 return
102 if verbose > 1:
103 print "checking", `file`, "..."
105 try:
106 process_tokens(tokenize.generate_tokens(f.readline))
108 except tokenize.TokenError, msg:
109 errprint("%s: Token Error: %s" % (`file`, str(msg)))
110 return
112 except NannyNag, nag:
113 badline = nag.get_lineno()
114 line = nag.get_line()
115 if verbose:
116 print "%s: *** Line %d: trouble in tab city! ***" % (
117 `file`, badline)
118 print "offending line:", `line`
119 print nag.get_msg()
120 else:
121 if ' ' in file: file = '"' + file + '"'
122 if filename_only: print file
123 else: print file, badline, `line`
124 return
126 if verbose:
127 print "%s: Clean bill of health." % `file`
129 class Whitespace:
130 # the characters used for space and tab
131 S, T = ' \t'
133 # members:
134 # raw
135 # the original string
137 # the number of leading whitespace characters in raw
138 # nt
139 # the number of tabs in raw[:n]
140 # norm
141 # the normal form as a pair (count, trailing), where:
142 # count
143 # a tuple such that raw[:n] contains count[i]
144 # instances of S * i + T
145 # trailing
146 # the number of trailing spaces in raw[:n]
147 # It's A Theorem that m.indent_level(t) ==
148 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
149 # is_simple
150 # true iff raw[:n] is of the form (T*)(S*)
152 def __init__(self, ws):
153 self.raw = ws
154 S, T = Whitespace.S, Whitespace.T
155 count = []
156 b = n = nt = 0
157 for ch in self.raw:
158 if ch == S:
159 n = n + 1
160 b = b + 1
161 elif ch == T:
162 n = n + 1
163 nt = nt + 1
164 if b >= len(count):
165 count = count + [0] * (b - len(count) + 1)
166 count[b] = count[b] + 1
167 b = 0
168 else:
169 break
170 self.n = n
171 self.nt = nt
172 self.norm = tuple(count), b
173 self.is_simple = len(count) <= 1
175 # return length of longest contiguous run of spaces (whether or not
176 # preceding a tab)
177 def longest_run_of_spaces(self):
178 count, trailing = self.norm
179 return max(len(count)-1, trailing)
181 def indent_level(self, tabsize):
182 # count, il = self.norm
183 # for i in range(len(count)):
184 # if count[i]:
185 # il = il + (i/tabsize + 1)*tabsize * count[i]
186 # return il
188 # quicker:
189 # il = trailing + sum (i/ts + 1)*ts*count[i] =
190 # trailing + ts * sum (i/ts + 1)*count[i] =
191 # trailing + ts * sum i/ts*count[i] + count[i] =
192 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
193 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
194 # and note that i/ts*count[i] is 0 when i < ts
196 count, trailing = self.norm
197 il = 0
198 for i in range(tabsize, len(count)):
199 il = il + i/tabsize * count[i]
200 return trailing + tabsize * (il + self.nt)
202 # return true iff self.indent_level(t) == other.indent_level(t)
203 # for all t >= 1
204 def equal(self, other):
205 return self.norm == other.norm
207 # return a list of tuples (ts, i1, i2) such that
208 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
209 # Intended to be used after not self.equal(other) is known, in which
210 # case it will return at least one witnessing tab size.
211 def not_equal_witness(self, other):
212 n = max(self.longest_run_of_spaces(),
213 other.longest_run_of_spaces()) + 1
214 a = []
215 for ts in range(1, n+1):
216 if self.indent_level(ts) != other.indent_level(ts):
217 a.append( (ts,
218 self.indent_level(ts),
219 other.indent_level(ts)) )
220 return a
222 # Return True iff self.indent_level(t) < other.indent_level(t)
223 # for all t >= 1.
224 # The algorithm is due to Vincent Broman.
225 # Easy to prove it's correct.
226 # XXXpost that.
227 # Trivial to prove n is sharp (consider T vs ST).
228 # Unknown whether there's a faster general way. I suspected so at
229 # first, but no longer.
230 # For the special (but common!) case where M and N are both of the
231 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
232 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
233 # XXXwrite that up.
234 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
235 def less(self, other):
236 if self.n >= other.n:
237 return False
238 if self.is_simple and other.is_simple:
239 return self.nt <= other.nt
240 n = max(self.longest_run_of_spaces(),
241 other.longest_run_of_spaces()) + 1
242 # the self.n >= other.n test already did it for ts=1
243 for ts in range(2, n+1):
244 if self.indent_level(ts) >= other.indent_level(ts):
245 return False
246 return True
248 # return a list of tuples (ts, i1, i2) such that
249 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
250 # Intended to be used after not self.less(other) is known, in which
251 # case it will return at least one witnessing tab size.
252 def not_less_witness(self, other):
253 n = max(self.longest_run_of_spaces(),
254 other.longest_run_of_spaces()) + 1
255 a = []
256 for ts in range(1, n+1):
257 if self.indent_level(ts) >= other.indent_level(ts):
258 a.append( (ts,
259 self.indent_level(ts),
260 other.indent_level(ts)) )
261 return a
263 def format_witnesses(w):
264 firsts = map(lambda tup: str(tup[0]), w)
265 prefix = "at tab size"
266 if len(w) > 1:
267 prefix = prefix + "s"
268 return prefix + " " + ', '.join(firsts)
270 def process_tokens(tokens):
271 INDENT = tokenize.INDENT
272 DEDENT = tokenize.DEDENT
273 NEWLINE = tokenize.NEWLINE
274 JUNK = tokenize.COMMENT, tokenize.NL
275 indents = [Whitespace("")]
276 check_equal = 0
278 for (type, token, start, end, line) in tokens:
279 if type == NEWLINE:
280 # a program statement, or ENDMARKER, will eventually follow,
281 # after some (possibly empty) run of tokens of the form
282 # (NL | COMMENT)* (INDENT | DEDENT+)?
283 # If an INDENT appears, setting check_equal is wrong, and will
284 # be undone when we see the INDENT.
285 check_equal = 1
287 elif type == INDENT:
288 check_equal = 0
289 thisguy = Whitespace(token)
290 if not indents[-1].less(thisguy):
291 witness = indents[-1].not_less_witness(thisguy)
292 msg = "indent not greater e.g. " + format_witnesses(witness)
293 raise NannyNag(start[0], msg, line)
294 indents.append(thisguy)
296 elif type == DEDENT:
297 # there's nothing we need to check here! what's important is
298 # that when the run of DEDENTs ends, the indentation of the
299 # program statement (or ENDMARKER) that triggered the run is
300 # equal to what's left at the top of the indents stack
302 # Ouch! This assert triggers if the last line of the source
303 # is indented *and* lacks a newline -- then DEDENTs pop out
304 # of thin air.
305 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
306 check_equal = 1
308 del indents[-1]
310 elif check_equal and type not in JUNK:
311 # this is the first "real token" following a NEWLINE, so it
312 # must be the first token of the next program statement, or an
313 # ENDMARKER; the "line" argument exposes the leading whitespace
314 # for this statement; in the case of ENDMARKER, line is an empty
315 # string, so will properly match the empty string with which the
316 # "indents" stack was seeded
317 check_equal = 0
318 thisguy = Whitespace(line)
319 if not indents[-1].equal(thisguy):
320 witness = indents[-1].not_equal_witness(thisguy)
321 msg = "indent not equal e.g. " + format_witnesses(witness)
322 raise NannyNag(start[0], msg, line)
325 if __name__ == '__main__':
326 main()