3 # Released to the public domain, by Tim Peters, 03 October 2000.
5 """reindent [-d][-r][-v] [ path ... ]
7 -d (--dryrun) Dry run. Analyze, but don't make any changes to, files.
8 -r (--recurse) Recurse. Search for all .py files in subdirectories too.
9 -v (--verbose) Verbose. Print informative msgs; else no output.
10 -h (--help) Help. Print this usage information and exit.
12 Change Python (.py) files to use 4-space indents and no hard tab characters.
13 Also trim excess spaces and tabs from ends of lines, and remove empty lines
14 at the end of files. Also ensure the last line ends with a newline.
16 If no paths are given on the command line, reindent operates as a filter,
17 reading a single source file from standard input and writing the transformed
18 source to standard output. In this case, the -d, -r and -v flags are
21 You can pass one or more file and/or directory paths. When a directory
22 path, all .py files within the directory will be examined, and, if the -r
23 option is given, likewise recursively for subdirectories.
25 If output is not to standard output, reindent overwrites files in place,
26 renaming the originals with a .bak extension. If it finds nothing to
27 change, the file is left alone. If reindent does change a file, the changed
28 file is a fixed-point for future runs (i.e., running reindent on the
29 resulting .py file won't change it again).
31 The hard part of reindenting is figuring out what to do with comment
32 lines. So long as the input files get a clean bill of health from
33 tabnanny.py, reindent should do a good job.
48 print >> sys
.stderr
, msg
49 print >> sys
.stderr
, __doc__
54 sys
.stderr
.write(sep
+ str(arg
))
56 sys
.stderr
.write("\n")
60 global verbose
, recurse
, dryrun
62 opts
, args
= getopt
.getopt(sys
.argv
[1:], "drvh",
63 ["dryrun", "recurse", "verbose", "help"])
64 except getopt
.error
, msg
:
68 if o
in ('-d', '--dryrun'):
70 elif o
in ('-r', '--recurse'):
72 elif o
in ('-v', '--verbose'):
74 elif o
in ('-h', '--help'):
78 r
= Reindenter(sys
.stdin
)
86 if os
.path
.isdir(file) and not os
.path
.islink(file):
88 print "listing directory", file
89 names
= os
.listdir(file)
91 fullname
= os
.path
.join(file, name
)
92 if ((recurse
and os
.path
.isdir(fullname
) and
93 not os
.path
.islink(fullname
))
94 or name
.lower().endswith(".py")):
99 print "checking", file, "...",
103 errprint("%s: I/O Error: %s" % (file, str(msg
)))
112 print "But this is a dry run, so leaving it alone."
115 if os
.path
.exists(bak
):
119 print "renamed", file, "to", bak
124 print "wrote new", file
129 def _rstrip(line
, JUNK
='\n \t'):
130 """Return line stripped of trailing spaces, tabs, newlines.
132 Note that line.rstrip() instead also strips sundry control characters,
133 but at least one known Emacs user expects to keep junk like that, not
134 mentioning Barry by name or anything <wink>.
138 while i
> 0 and line
[i
-1] in JUNK
:
144 def __init__(self
, f
):
145 self
.find_stmt
= 1 # next token begins a fresh stmt?
146 self
.level
= 0 # current indent level
149 self
.raw
= f
.readlines()
151 # File lines, rstripped & tab-expanded. Dummy at start is so
152 # that we can use tokenize's 1-based line numbering easily.
153 # Note that a line is all-blank iff it's "\n".
154 self
.lines
= [_rstrip(line
).expandtabs() + "\n"
155 for line
in self
.raw
]
156 self
.lines
.insert(0, None)
157 self
.index
= 1 # index into self.lines of next line
159 # List of (lineno, indentlevel) pairs, one for each stmt and
160 # comment line. indentlevel is -1 for comment lines, as a
161 # signal that tokenize doesn't know what to do about them;
162 # indeed, they're our headache!
166 tokenize
.tokenize(self
.getline
, self
.tokeneater
)
167 # Remove trailing empty lines.
169 while lines
and lines
[-1] == "\n":
173 stats
.append((len(lines
), 0))
174 # Map count of leading spaces to # we want.
176 # Program after transformation.
177 after
= self
.after
= []
178 # Copy over initial empty lines -- there's nothing to do until
179 # we see a line with *something* on it.
181 after
.extend(lines
[1:i
])
182 for i
in range(len(stats
)-1):
183 thisstmt
, thislevel
= stats
[i
]
184 nextstmt
= stats
[i
+1][0]
185 have
= getlspace(lines
[thisstmt
])
190 # An indented comment line. If we saw the same
191 # indentation before, reuse what it most recently
193 want
= have2want
.get(have
, -1)
195 # Then it probably belongs to the next real stmt.
196 for j
in xrange(i
+1, len(stats
)-1):
197 jline
, jlevel
= stats
[j
]
199 if have
== getlspace(lines
[jline
]):
202 if want
< 0: # Maybe it's a hanging
203 # comment like this one,
204 # in which case we should shift it like its base
206 for j
in xrange(i
-1, -1, -1):
207 jline
, jlevel
= stats
[j
]
209 want
= have
+ getlspace(after
[jline
-1]) - \
210 getlspace(lines
[jline
])
213 # Still no luck -- leave it alone.
218 have2want
[have
] = want
220 if diff
== 0 or have
== 0:
221 after
.extend(lines
[thisstmt
:nextstmt
])
223 for line
in lines
[thisstmt
:nextstmt
]:
228 after
.append(" " * diff
+ line
)
230 remove
= min(getlspace(line
), -diff
)
231 after
.append(line
[remove
:])
232 return self
.raw
!= self
.after
235 f
.writelines(self
.after
)
237 # Line-getter for tokenize.
239 if self
.index
>= len(self
.lines
):
242 line
= self
.lines
[self
.index
]
246 # Line-eater for tokenize.
247 def tokeneater(self
, type, token
, (sline
, scol
), end
, line
,
248 INDENT
=tokenize
.INDENT
,
249 DEDENT
=tokenize
.DEDENT
,
250 NEWLINE
=tokenize
.NEWLINE
,
251 COMMENT
=tokenize
.COMMENT
,
255 # A program statement, or ENDMARKER, will eventually follow,
256 # after some (possibly empty) run of tokens of the form
257 # (NL | COMMENT)* (INDENT | DEDENT+)?
268 elif type == COMMENT
:
270 self
.stats
.append((sline
, -1))
271 # but we're still looking for a new stmt, so leave
278 # This is the first "real token" following a NEWLINE, so it
279 # must be the first token of the next program statement, or an
282 if line
: # not endmarker
283 self
.stats
.append((sline
, self
.level
))
285 # Count number of leading blanks.
288 while i
< n
and line
[i
] == " ":
292 if __name__
== '__main__':