openfile(): Go back to opening the files in text mode. This undoes
[python/dscho.git] / Tools / scripts / reindent.py
blob13150f9eef1705287841daa18f09c990c5530cc6
1 #! /usr/bin/env python
3 # Released to the public domain, by Tim Peters, 03 October 2000.
5 """reindent [-d][-r][-v] [ path ... ]
7 -d Dry run. Analyze, but don't make any changes to, files.
8 -r Recurse. Search for all .py files in subdirectories too.
9 -v Verbose. Print informative msgs; else no output.
11 Change Python (.py) files to use 4-space indents and no hard tab characters.
12 Also trim excess spaces and tabs from ends of lines, and remove empty lines
13 at the end of files. Also ensure the last line ends with a newline.
15 If no paths are given on the command line, reindent operates as a filter,
16 reading a single source file from standard input and writing the transformed
17 source to standard output. In this case, the -d, -r and -v flags are
18 ignored.
20 You can pass one or more file and/or directory paths. When a directory
21 path, all .py files within the directory will be examined, and, if the -r
22 option is given, likewise recursively for subdirectories.
24 If output is not to standard output, reindent overwrites files in place,
25 renaming the originals with a .bak extension. If it finds nothing to
26 change, the file is left alone. If reindent does change a file, the changed
27 file is a fixed-point for future runs (i.e., running reindent on the
28 resulting .py file won't change it again).
30 The hard part of reindenting is figuring out what to do with comment
31 lines. So long as the input files get a clean bill of health from
32 tabnanny.py, reindent should do a good job.
33 """
35 __version__ = "1"
37 import tokenize
38 import os
39 import sys
41 verbose = 0
42 recurse = 0
43 dryrun = 0
45 def errprint(*args):
46 sep = ""
47 for arg in args:
48 sys.stderr.write(sep + str(arg))
49 sep = " "
50 sys.stderr.write("\n")
52 def main():
53 import getopt
54 global verbose, recurse, dryrun
55 try:
56 opts, args = getopt.getopt(sys.argv[1:], "drv")
57 except getopt.error, msg:
58 errprint(msg)
59 return
60 for o, a in opts:
61 if o == '-d':
62 dryrun += 1
63 elif o == '-r':
64 recurse += 1
65 elif o == '-v':
66 verbose += 1
67 if not args:
68 r = Reindenter(sys.stdin)
69 r.run()
70 r.write(sys.stdout)
71 return
72 for arg in args:
73 check(arg)
75 def check(file):
76 if os.path.isdir(file) and not os.path.islink(file):
77 if verbose:
78 print "listing directory", file
79 names = os.listdir(file)
80 for name in names:
81 fullname = os.path.join(file, name)
82 if ((recurse and os.path.isdir(fullname) and
83 not os.path.islink(fullname))
84 or name.lower().endswith(".py")):
85 check(fullname)
86 return
88 if verbose:
89 print "checking", file, "...",
90 try:
91 f = open(file)
92 except IOError, msg:
93 errprint("%s: I/O Error: %s" % (file, str(msg)))
94 return
96 r = Reindenter(f)
97 f.close()
98 if r.run():
99 if verbose:
100 print "changed."
101 if dryrun:
102 print "But this is a dry run, so leaving it alone."
103 if not dryrun:
104 bak = file + ".bak"
105 if os.path.exists(bak):
106 os.remove(bak)
107 os.rename(file, bak)
108 if verbose:
109 print "renamed", file, "to", bak
110 f = open(file, "w")
111 r.write(f)
112 f.close()
113 if verbose:
114 print "wrote new", file
115 else:
116 if verbose:
117 print "unchanged."
119 def _rstrip(line, JUNK='\n \t'):
120 """Return line stripped of trailing spaces, tabs, newlines.
122 Note that line.rstrip() instead also strips sundry control characters,
123 but at least one known Emacs user expects to keep junk like that, not
124 mentioning Barry by name or anything <wink>.
127 i = len(line)
128 while i > 0 and line[i-1] in JUNK:
129 i -= 1
130 return line[:i]
132 class Reindenter:
134 def __init__(self, f):
135 self.find_stmt = 1 # next token begins a fresh stmt?
136 self.level = 0 # current indent level
138 # Raw file lines.
139 self.raw = f.readlines()
141 # File lines, rstripped & tab-expanded. Dummy at start is so
142 # that we can use tokenize's 1-based line numbering easily.
143 # Note that a line is all-blank iff it's "\n".
144 self.lines = [_rstrip(line).expandtabs() + "\n"
145 for line in self.raw]
146 self.lines.insert(0, None)
147 self.index = 1 # index into self.lines of next line
149 # List of (lineno, indentlevel) pairs, one for each stmt and
150 # comment line. indentlevel is -1 for comment lines, as a
151 # signal that tokenize doesn't know what to do about them;
152 # indeed, they're our headache!
153 self.stats = []
155 def run(self):
156 tokenize.tokenize(self.getline, self.tokeneater)
157 # Remove trailing empty lines.
158 lines = self.lines
159 while lines and lines[-1] == "\n":
160 lines.pop()
161 # Sentinel.
162 stats = self.stats
163 stats.append((len(lines), 0))
164 # Map count of leading spaces to # we want.
165 have2want = {}
166 # Program after transformation.
167 after = self.after = []
168 # Copy over initial empty lines -- there's nothing to do until
169 # we see a line with *something* on it.
170 i = stats[0][0]
171 after.extend(lines[1:i])
172 for i in range(len(stats)-1):
173 thisstmt, thislevel = stats[i]
174 nextstmt = stats[i+1][0]
175 have = getlspace(lines[thisstmt])
176 want = thislevel * 4
177 if want < 0:
178 # A comment line.
179 if have:
180 # An indented comment line. If we saw the same
181 # indentation before, reuse what it most recently
182 # mapped to.
183 want = have2want.get(have, -1)
184 if want < 0:
185 # Then it probably belongs to the next real stmt.
186 for j in xrange(i+1, len(stats)-1):
187 jline, jlevel = stats[j]
188 if jlevel >= 0:
189 if have == getlspace(lines[jline]):
190 want = jlevel * 4
191 break
192 if want < 0: # Maybe it's a hanging
193 # comment like this one,
194 # in which case we should shift it like its base
195 # line got shifted.
196 for j in xrange(i-1, -1, -1):
197 jline, jlevel = stats[j]
198 if jlevel >= 0:
199 want = have + getlspace(after[jline-1]) - \
200 getlspace(lines[jline])
201 break
202 if want < 0:
203 # Still no luck -- leave it alone.
204 want = have
205 else:
206 want = 0
207 assert want >= 0
208 have2want[have] = want
209 diff = want - have
210 if diff == 0 or have == 0:
211 after.extend(lines[thisstmt:nextstmt])
212 else:
213 for line in lines[thisstmt:nextstmt]:
214 if diff > 0:
215 if line == "\n":
216 after.append(line)
217 else:
218 after.append(" " * diff + line)
219 else:
220 remove = min(getlspace(line), -diff)
221 after.append(line[remove:])
222 return self.raw != self.after
224 def write(self, f):
225 f.writelines(self.after)
227 # Line-getter for tokenize.
228 def getline(self):
229 if self.index >= len(self.lines):
230 line = ""
231 else:
232 line = self.lines[self.index]
233 self.index += 1
234 return line
236 # Line-eater for tokenize.
237 def tokeneater(self, type, token, (sline, scol), end, line,
238 INDENT=tokenize.INDENT,
239 DEDENT=tokenize.DEDENT,
240 NEWLINE=tokenize.NEWLINE,
241 COMMENT=tokenize.COMMENT,
242 NL=tokenize.NL):
244 if type == NEWLINE:
245 # A program statement, or ENDMARKER, will eventually follow,
246 # after some (possibly empty) run of tokens of the form
247 # (NL | COMMENT)* (INDENT | DEDENT+)?
248 self.find_stmt = 1
250 elif type == INDENT:
251 self.find_stmt = 1
252 self.level += 1
254 elif type == DEDENT:
255 self.find_stmt = 1
256 self.level -= 1
258 elif type == COMMENT:
259 if self.find_stmt:
260 self.stats.append((sline, -1))
261 # but we're still looking for a new stmt, so leave
262 # find_stmt alone
264 elif type == NL:
265 pass
267 elif self.find_stmt:
268 # This is the first "real token" following a NEWLINE, so it
269 # must be the first token of the next program statement, or an
270 # ENDMARKER.
271 self.find_stmt = 0
272 if line: # not endmarker
273 self.stats.append((sline, self.level))
275 # Count number of leading blanks.
276 def getlspace(line):
277 i, n = 0, len(line)
278 while i < n and line[i] == " ":
279 i += 1
280 return i
282 if __name__ == '__main__':
283 main()