3 """The Tab Nanny despises ambiguous indentation. She knows no mercy.
5 tabnanny -- Detection of ambiguous indentation
7 For the time being this module is intended to be called as a script.
8 However it is possible to import it into an IDE and use the function
9 check() described below.
11 Warning: The API provided by this module is likely to change in future
12 releases; such changes may not be backward compatible.
15 # Released to the public domain, by Tim Peters, 15 April 1998.
17 # XXX Note: this is now a standard library module.
18 # XXX The API needs to undergo changes however; the current code is too
19 # XXX script-like. This will be addressed later.
27 if not hasattr(tokenize
, 'NL'):
28 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
30 __all__
= ["check", "NannyNag", "process_tokens"]
38 sys
.stderr
.write(sep
+ str(arg
))
40 sys
.stderr
.write("\n")
43 global verbose
, filename_only
45 opts
, args
= getopt
.getopt(sys
.argv
[1:], "qv")
46 except getopt
.error
, msg
:
51 filename_only
= filename_only
+ 1
55 errprint("Usage:", sys
.argv
[0], "[-v] file_or_directory ...")
60 class NannyNag(Exception):
62 Raised by tokeneater() if detecting an ambiguous indent.
63 Captured and handled in check().
65 def __init__(self
, lineno
, msg
, line
):
66 self
.lineno
, self
.msg
, self
.line
= lineno
, msg
, line
77 If file_or_dir is a directory and not a symbolic link, then recursively
78 descend the directory tree named by file_or_dir, checking all .py files
79 along the way. If file_or_dir is an ordinary Python source file, it is
80 checked for whitespace related problems. The diagnostic messages are
81 written to standard output using the print statement.
84 if os
.path
.isdir(file) and not os
.path
.islink(file):
86 print "%s: listing directory" % `
file`
87 names
= os
.listdir(file)
89 fullname
= os
.path
.join(file, name
)
90 if (os
.path
.isdir(fullname
) and
91 not os
.path
.islink(fullname
) or
92 os
.path
.normcase(name
[-3:]) == ".py"):
99 errprint("%s: I/O Error: %s" % (`
file`
, str(msg
)))
103 print "checking", `
file`
, "..."
106 process_tokens(tokenize
.generate_tokens(f
.readline
))
108 except tokenize
.TokenError
, msg
:
109 errprint("%s: Token Error: %s" % (`
file`
, str(msg
)))
112 except NannyNag
, nag
:
113 badline
= nag
.get_lineno()
114 line
= nag
.get_line()
116 print "%s: *** Line %d: trouble in tab city! ***" % (
118 print "offending line:", `line`
121 if ' ' in file: file = '"' + file + '"'
122 if filename_only
: print file
123 else: print file, badline
, `line`
127 print "%s: Clean bill of health." % `
file`
130 # the characters used for space and tab
135 # the original string
137 # the number of leading whitespace characters in raw
139 # the number of tabs in raw[:n]
141 # the normal form as a pair (count, trailing), where:
143 # a tuple such that raw[:n] contains count[i]
144 # instances of S * i + T
146 # the number of trailing spaces in raw[:n]
147 # It's A Theorem that m.indent_level(t) ==
148 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
150 # true iff raw[:n] is of the form (T*)(S*)
152 def __init__(self
, ws
):
154 S
, T
= Whitespace
.S
, Whitespace
.T
165 count
= count
+ [0] * (b
- len(count
) + 1)
166 count
[b
] = count
[b
] + 1
172 self
.norm
= tuple(count
), b
173 self
.is_simple
= len(count
) <= 1
175 # return length of longest contiguous run of spaces (whether or not
177 def longest_run_of_spaces(self
):
178 count
, trailing
= self
.norm
179 return max(len(count
)-1, trailing
)
181 def indent_level(self
, tabsize
):
182 # count, il = self.norm
183 # for i in range(len(count)):
185 # il = il + (i/tabsize + 1)*tabsize * count[i]
189 # il = trailing + sum (i/ts + 1)*ts*count[i] =
190 # trailing + ts * sum (i/ts + 1)*count[i] =
191 # trailing + ts * sum i/ts*count[i] + count[i] =
192 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
193 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
194 # and note that i/ts*count[i] is 0 when i < ts
196 count
, trailing
= self
.norm
198 for i
in range(tabsize
, len(count
)):
199 il
= il
+ i
/tabsize
* count
[i
]
200 return trailing
+ tabsize
* (il
+ self
.nt
)
202 # return true iff self.indent_level(t) == other.indent_level(t)
204 def equal(self
, other
):
205 return self
.norm
== other
.norm
207 # return a list of tuples (ts, i1, i2) such that
208 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
209 # Intended to be used after not self.equal(other) is known, in which
210 # case it will return at least one witnessing tab size.
211 def not_equal_witness(self
, other
):
212 n
= max(self
.longest_run_of_spaces(),
213 other
.longest_run_of_spaces()) + 1
215 for ts
in range(1, n
+1):
216 if self
.indent_level(ts
) != other
.indent_level(ts
):
218 self
.indent_level(ts
),
219 other
.indent_level(ts
)) )
222 # Return True iff self.indent_level(t) < other.indent_level(t)
224 # The algorithm is due to Vincent Broman.
225 # Easy to prove it's correct.
227 # Trivial to prove n is sharp (consider T vs ST).
228 # Unknown whether there's a faster general way. I suspected so at
229 # first, but no longer.
230 # For the special (but common!) case where M and N are both of the
231 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
232 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
234 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
235 def less(self
, other
):
236 if self
.n
>= other
.n
:
238 if self
.is_simple
and other
.is_simple
:
239 return self
.nt
<= other
.nt
240 n
= max(self
.longest_run_of_spaces(),
241 other
.longest_run_of_spaces()) + 1
242 # the self.n >= other.n test already did it for ts=1
243 for ts
in range(2, n
+1):
244 if self
.indent_level(ts
) >= other
.indent_level(ts
):
248 # return a list of tuples (ts, i1, i2) such that
249 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
250 # Intended to be used after not self.less(other) is known, in which
251 # case it will return at least one witnessing tab size.
252 def not_less_witness(self
, other
):
253 n
= max(self
.longest_run_of_spaces(),
254 other
.longest_run_of_spaces()) + 1
256 for ts
in range(1, n
+1):
257 if self
.indent_level(ts
) >= other
.indent_level(ts
):
259 self
.indent_level(ts
),
260 other
.indent_level(ts
)) )
263 def format_witnesses(w
):
264 firsts
= map(lambda tup
: str(tup
[0]), w
)
265 prefix
= "at tab size"
267 prefix
= prefix
+ "s"
268 return prefix
+ " " + ', '.join(firsts
)
270 def process_tokens(tokens
):
271 INDENT
= tokenize
.INDENT
272 DEDENT
= tokenize
.DEDENT
273 NEWLINE
= tokenize
.NEWLINE
274 JUNK
= tokenize
.COMMENT
, tokenize
.NL
275 indents
= [Whitespace("")]
278 for (type, token
, start
, end
, line
) in tokens
:
280 # a program statement, or ENDMARKER, will eventually follow,
281 # after some (possibly empty) run of tokens of the form
282 # (NL | COMMENT)* (INDENT | DEDENT+)?
283 # If an INDENT appears, setting check_equal is wrong, and will
284 # be undone when we see the INDENT.
289 thisguy
= Whitespace(token
)
290 if not indents
[-1].less(thisguy
):
291 witness
= indents
[-1].not_less_witness(thisguy
)
292 msg
= "indent not greater e.g. " + format_witnesses(witness
)
293 raise NannyNag(start
[0], msg
, line
)
294 indents
.append(thisguy
)
297 # there's nothing we need to check here! what's important is
298 # that when the run of DEDENTs ends, the indentation of the
299 # program statement (or ENDMARKER) that triggered the run is
300 # equal to what's left at the top of the indents stack
302 # Ouch! This assert triggers if the last line of the source
303 # is indented *and* lacks a newline -- then DEDENTs pop out
305 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
310 elif check_equal
and type not in JUNK
:
311 # this is the first "real token" following a NEWLINE, so it
312 # must be the first token of the next program statement, or an
313 # ENDMARKER; the "line" argument exposes the leading whitespace
314 # for this statement; in the case of ENDMARKER, line is an empty
315 # string, so will properly match the empty string with which the
316 # "indents" stack was seeded
318 thisguy
= Whitespace(line
)
319 if not indents
[-1].equal(thisguy
):
320 witness
= indents
[-1].not_equal_witness(thisguy
)
321 msg
= "indent not equal e.g. " + format_witnesses(witness
)
322 raise NannyNag(start
[0], msg
, line
)
325 if __name__
== '__main__':