3 """The Tab Nanny despises ambiguous indentation. She knows no mercy."""
5 # Released to the public domain, by Tim Peters, 15 April 1998.
7 # XXX Note: this is now a standard library module.
8 # XXX The API needs to undergo changes however; the current code is too
9 # XXX script-like. This will be addressed later.
26 sys
.stderr
.write(sep
+ str(arg
))
28 sys
.stderr
.write("\n")
31 global verbose
, filename_only
33 opts
, args
= getopt
.getopt(sys
.argv
[1:], "qv")
34 except getopt
.error
, msg
:
39 filename_only
= filename_only
+ 1
43 errprint("Usage:", sys
.argv
[0], "[-v] file_or_directory ...")
49 def __init__(self
, lineno
, msg
, line
):
50 self
.lineno
, self
.msg
, self
.line
= lineno
, msg
, line
59 if os
.path
.isdir(file) and not os
.path
.islink(file):
61 print "%s: listing directory" % `
file`
62 names
= os
.listdir(file)
64 fullname
= os
.path
.join(file, name
)
65 if (os
.path
.isdir(fullname
) and
66 not os
.path
.islink(fullname
) or
67 os
.path
.normcase(name
[-3:]) == ".py"):
74 errprint("%s: I/O Error: %s" % (`
file`
, str(msg
)))
78 print "checking", `
file`
, "..."
82 tokenize
.tokenize(f
.readline
, tokeneater
)
84 except tokenize
.TokenError
, msg
:
85 errprint("%s: Token Error: %s" % (`
file`
, str(msg
)))
89 badline
= nag
.get_lineno()
92 print "%s: *** Line %d: trouble in tab city! ***" % (
94 print "offending line:", `line`
97 if ' ' in file: file = '"' + file + '"'
98 if filename_only
: print file
99 else: print file, badline
, `line`
103 print "%s: Clean bill of health." % `
file`
106 # the characters used for space and tab
111 # the original string
113 # the number of leading whitespace characters in raw
115 # the number of tabs in raw[:n]
117 # the normal form as a pair (count, trailing), where:
119 # a tuple such that raw[:n] contains count[i]
120 # instances of S * i + T
122 # the number of trailing spaces in raw[:n]
123 # It's A Theorem that m.indent_level(t) ==
124 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
126 # true iff raw[:n] is of the form (T*)(S*)
128 def __init__(self
, ws
):
130 S
, T
= Whitespace
.S
, Whitespace
.T
141 count
= count
+ [0] * (b
- len(count
) + 1)
142 count
[b
] = count
[b
] + 1
148 self
.norm
= tuple(count
), b
149 self
.is_simple
= len(count
) <= 1
151 # return length of longest contiguous run of spaces (whether or not
153 def longest_run_of_spaces(self
):
154 count
, trailing
= self
.norm
155 return max(len(count
)-1, trailing
)
157 def indent_level(self
, tabsize
):
158 # count, il = self.norm
159 # for i in range(len(count)):
161 # il = il + (i/tabsize + 1)*tabsize * count[i]
165 # il = trailing + sum (i/ts + 1)*ts*count[i] =
166 # trailing + ts * sum (i/ts + 1)*count[i] =
167 # trailing + ts * sum i/ts*count[i] + count[i] =
168 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
169 # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
170 # and note that i/ts*count[i] is 0 when i < ts
172 count
, trailing
= self
.norm
174 for i
in range(tabsize
, len(count
)):
175 il
= il
+ i
/tabsize
* count
[i
]
176 return trailing
+ tabsize
* (il
+ self
.nt
)
178 # return true iff self.indent_level(t) == other.indent_level(t)
180 def equal(self
, other
):
181 return self
.norm
== other
.norm
183 # return a list of tuples (ts, i1, i2) such that
184 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
185 # Intended to be used after not self.equal(other) is known, in which
186 # case it will return at least one witnessing tab size.
187 def not_equal_witness(self
, other
):
188 n
= max(self
.longest_run_of_spaces(),
189 other
.longest_run_of_spaces()) + 1
191 for ts
in range(1, n
+1):
192 if self
.indent_level(ts
) != other
.indent_level(ts
):
194 self
.indent_level(ts
),
195 other
.indent_level(ts
)) )
198 # Return true iff self.indent_level(t) < other.indent_level(t)
200 # The algorithm is due to Vincent Broman.
201 # Easy to prove it's correct.
203 # Trivial to prove n is sharp (consider T vs ST).
204 # Unknown whether there's a faster general way. I suspected so at
205 # first, but no longer.
206 # For the special (but common!) case where M and N are both of the
207 # form (T*)(S*), M.less(N) iff M.len() < N.len() and
208 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
210 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
211 def less(self
, other
):
212 if self
.n
>= other
.n
:
214 if self
.is_simple
and other
.is_simple
:
215 return self
.nt
<= other
.nt
216 n
= max(self
.longest_run_of_spaces(),
217 other
.longest_run_of_spaces()) + 1
218 # the self.n >= other.n test already did it for ts=1
219 for ts
in range(2, n
+1):
220 if self
.indent_level(ts
) >= other
.indent_level(ts
):
224 # return a list of tuples (ts, i1, i2) such that
225 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
226 # Intended to be used after not self.less(other) is known, in which
227 # case it will return at least one witnessing tab size.
228 def not_less_witness(self
, other
):
229 n
= max(self
.longest_run_of_spaces(),
230 other
.longest_run_of_spaces()) + 1
232 for ts
in range(1, n
+1):
233 if self
.indent_level(ts
) >= other
.indent_level(ts
):
235 self
.indent_level(ts
),
236 other
.indent_level(ts
)) )
239 def format_witnesses(w
):
241 firsts
= map(lambda tup
: str(tup
[0]), w
)
242 prefix
= "at tab size"
244 prefix
= prefix
+ "s"
245 return prefix
+ " " + string
.join(firsts
, ', ')
247 # The collection of globals, the reset_globals() function, and the
248 # tokeneater() function, depend on which version of tokenize is
251 if hasattr(tokenize
, 'NL'):
252 # take advantage of Guido's patch!
258 global indents
, check_equal
260 indents
= [Whitespace("")]
262 def tokeneater(type, token
, start
, end
, line
,
263 INDENT
=tokenize
.INDENT
,
264 DEDENT
=tokenize
.DEDENT
,
265 NEWLINE
=tokenize
.NEWLINE
,
266 JUNK
=(tokenize
.COMMENT
, tokenize
.NL
) ):
267 global indents
, check_equal
270 # a program statement, or ENDMARKER, will eventually follow,
271 # after some (possibly empty) run of tokens of the form
272 # (NL | COMMENT)* (INDENT | DEDENT+)?
273 # If an INDENT appears, setting check_equal is wrong, and will
274 # be undone when we see the INDENT.
279 thisguy
= Whitespace(token
)
280 if not indents
[-1].less(thisguy
):
281 witness
= indents
[-1].not_less_witness(thisguy
)
282 msg
= "indent not greater e.g. " + format_witnesses(witness
)
283 raise NannyNag(start
[0], msg
, line
)
284 indents
.append(thisguy
)
287 # there's nothing we need to check here! what's important is
288 # that when the run of DEDENTs ends, the indentation of the
289 # program statement (or ENDMARKER) that triggered the run is
290 # equal to what's left at the top of the indents stack
292 # Ouch! This assert triggers if the last line of the source
293 # is indented *and* lacks a newline -- then DEDENTs pop out
295 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT
300 elif check_equal
and type not in JUNK
:
301 # this is the first "real token" following a NEWLINE, so it
302 # must be the first token of the next program statement, or an
303 # ENDMARKER; the "line" argument exposes the leading whitespace
304 # for this statement; in the case of ENDMARKER, line is an empty
305 # string, so will properly match the empty string with which the
306 # "indents" stack was seeded
308 thisguy
= Whitespace(line
)
309 if not indents
[-1].equal(thisguy
):
310 witness
= indents
[-1].not_equal_witness(thisguy
)
311 msg
= "indent not equal e.g. " + format_witnesses(witness
)
312 raise NannyNag(start
[0], msg
, line
)
315 # unpatched version of tokenize
322 global nesting_level
, indents
, check_equal
323 nesting_level
= check_equal
= 0
324 indents
= [Whitespace("")]
326 def tokeneater(type, token
, start
, end
, line
,
327 INDENT
=tokenize
.INDENT
,
328 DEDENT
=tokenize
.DEDENT
,
329 NEWLINE
=tokenize
.NEWLINE
,
330 COMMENT
=tokenize
.COMMENT
,
332 global nesting_level
, indents
, check_equal
336 thisguy
= Whitespace(token
)
337 if not indents
[-1].less(thisguy
):
338 witness
= indents
[-1].not_less_witness(thisguy
)
339 msg
= "indent not greater e.g. " + format_witnesses(witness
)
340 raise NannyNag(start
[0], msg
, line
)
341 indents
.append(thisguy
)
346 elif type == NEWLINE
:
347 if nesting_level
== 0:
350 elif type == COMMENT
:
355 thisguy
= Whitespace(line
)
356 if not indents
[-1].equal(thisguy
):
357 witness
= indents
[-1].not_equal_witness(thisguy
)
358 msg
= "indent not equal e.g. " + format_witnesses(witness
)
359 raise NannyNag(start
[0], msg
, line
)
361 if type == OP
and token
in ('{', '[', '('):
362 nesting_level
= nesting_level
+ 1
364 elif type == OP
and token
in ('}', ']', ')'):
365 if nesting_level
== 0:
366 raise NannyNag(start
[0],
367 "unbalanced bracket '" + token
+ "'",
369 nesting_level
= nesting_level
- 1
371 if __name__
== '__main__':