Tools/scripts/reindent.py

   1 #! /usr/bin/env python
   2
   3 # Released to the public domain, by Tim Peters, 03 October 2000.
   4
   5 """reindent [-d][-r][-v] path ...
   6
   7 -d  Dry run.  Analyze, but don't make any changes to, files.
   8 -r  Recurse.  Search for all .py files in subdirectories too.
   9 -v  Verbose.  Print informative msgs; else no output.
  10
  11 Change Python (.py) files to use 4-space indents and no hard tab characters.
  12 Also trim excess whitespace from ends of lines, and empty lines at the ends
  13 of files.  Ensure the last line ends with a newline.
  14
  15 Pass one or more file and/or directory paths.  When a directory path, all
  16 .py files within the directory will be examined, and, if the -r option is
  17 given, likewise recursively for subdirectories.
  18
  19 Overwrites files in place, renaming the originals with a .bak extension.
  20 If reindent finds nothing to change, the file is left alone.  If reindent
  21 does change a file, the changed file is a fixed-point for reindent (i.e.,
  22 running reindent on the resulting .py file won't change it again).
  23
  24 The hard part of reindenting is figuring out what to do with comment
  25 lines.  So long as the input files get a clean bill of health from
  26 tabnanny.py, reindent should do a good job.
  27 """
  28
  29 __version__ = "1"
  30
  31 import tokenize
  32 import os
  33 import sys
  34
  35 verbose = 0
  36 recurse = 0
  37 dryrun  = 0
  38
  39 def errprint(*args):
  40     sep = ""
  41     for arg in args:
  42         sys.stderr.write(sep + str(arg))
  43         sep = " "
  44     sys.stderr.write("\n")
  45
  46 def main():
  47     import getopt
  48     global verbose, recurse, dryrun
  49     try:
  50         opts, args = getopt.getopt(sys.argv[1:], "drv")
  51     except getopt.error, msg:
  52         errprint(msg)
  53         return
  54     for o, a in opts:
  55         if o == '-d':
  56             dryrun += 1
  57         elif o == '-r':
  58             recurse += 1
  59         elif o == '-v':
  60             verbose += 1
  61     if not args:
  62         errprint("Usage:", __doc__)
  63         return
  64     for arg in args:
  65         check(arg)
  66
  67 def check(file):
  68     if os.path.isdir(file) and not os.path.islink(file):
  69         if verbose:
  70             print "listing directory", file
  71         names = os.listdir(file)
  72         for name in names:
  73             fullname = os.path.join(file, name)
  74             if ((recurse and os.path.isdir(fullname) and
  75                  not os.path.islink(fullname))
  76                 or name.lower().endswith(".py")):
  77                 check(fullname)
  78         return
  79
  80     if verbose:
  81         print "checking", file, "...",
  82     try:
  83         f = open(file)
  84     except IOError, msg:
  85         errprint("%s: I/O Error: %s" % (file, str(msg)))
  86         return
  87
  88     r = Reindenter(f)
  89     f.close()
  90     if r.run():
  91         if verbose:
  92             print "changed."
  93             if dryrun:
  94                 print "But this is a dry run, so leaving it alone."
  95         if not dryrun:
  96             bak = file + ".bak"
  97             if os.path.exists(bak):
  98                 os.remove(bak)
  99             os.rename(file, bak)
 100             if verbose:
 101                 print "renamed", file, "to", bak
 102             f = open(file, "w")
 103             r.write(f)
 104             f.close()
 105             if verbose:
 106                 print "wrote new", file
 107     else:
 108         if verbose:
 109             print "unchanged."
 110
 111 class Reindenter:
 112
 113     def __init__(self, f):
 114         self.find_stmt = 1  # next token begins a fresh stmt?
 115         self.level = 0      # current indent level
 116
 117         # Raw file lines.
 118         self.raw = f.readlines()
 119
 120         # File lines, rstripped & tab-expanded.  Dummy at start is so
 121         # that we can use tokenize's 1-based line numbering easily.
 122         # Note that a line is all-blank iff it's "\n".
 123         self.lines = [line.rstrip().expandtabs() + "\n"
 124                       for line in self.raw]
 125         self.lines.insert(0, None)
 126         self.index = 1  # index into self.lines of next line
 127
 128         # List of (lineno, indentlevel) pairs, one for each stmt and
 129         # comment line.  indentlevel is -1 for comment lines, as a
 130         # signal that tokenize doesn't know what to do about them;
 131         # indeed, they're our headache!
 132         self.stats = []
 133
 134     def run(self):
 135         tokenize.tokenize(self.getline, self.tokeneater)
 136         # Remove trailing empty lines.
 137         lines = self.lines
 138         while lines and lines[-1] == "\n":
 139             lines.pop()
 140         # Sentinel.
 141         stats = self.stats
 142         stats.append((len(lines), 0))
 143         # Map count of leading spaces to # we want.
 144         have2want = {}
 145         # Program after transformation.
 146         after = self.after = []
 147         for i in range(len(stats)-1):
 148             thisstmt, thislevel = stats[i]
 149             nextstmt = stats[i+1][0]
 150             have = getlspace(lines[thisstmt])
 151             want = thislevel * 4
 152             if want < 0:
 153                 # A comment line.
 154                 if have:
 155                     # An indented comment line.  If we saw the same
 156                     # indentation before, reuse what it most recently
 157                     # mapped to.
 158                     want = have2want.get(have, -1)
 159                     if want < 0:
 160                         # Then it probably belongs to the next real stmt.
 161                         for j in xrange(i+1, len(stats)-1):
 162                             jline, jlevel = stats[j]
 163                             if jlevel >= 0:
 164                                 if have == getlspace(lines[jline]):
 165                                     want = jlevel * 4
 166                                 break
 167                     if want < 0:           # Maybe it's a hanging
 168                                            # comment like this one,
 169                         # in which case we should shift it like its base
 170                         # line got shifted.
 171                         for j in xrange(i-1, -1, -1):
 172                             jline, jlevel = stats[j]
 173                             if jlevel >= 0:
 174                                 want = have + getlspace(after[jline-1]) - \
 175                                        getlspace(lines[jline])
 176                                 break
 177                     if want < 0:
 178                         # Still no luck -- leave it alone.
 179                         want = have
 180                 else:
 181                     want = 0
 182             assert want >= 0
 183             have2want[have] = want
 184             diff = want - have
 185             if diff == 0 or have == 0:
 186                 after.extend(lines[thisstmt:nextstmt])
 187             else:
 188                 for line in lines[thisstmt:nextstmt]:
 189                     if diff > 0:
 190                         if line == "\n":
 191                             after.append(line)
 192                         else:
 193                             after.append(" " * diff + line)
 194                     else:
 195                         remove = min(getlspace(line), -diff)
 196                         after.append(line[remove:])
 197         return self.raw != self.after
 198
 199     def write(self, f):
 200         f.writelines(self.after)
 201
 202     # Line-getter for tokenize.
 203     def getline(self):
 204         if self.index >= len(self.lines):
 205             line = ""
 206         else:
 207             line = self.lines[self.index]
 208             self.index += 1
 209         return line
 210
 211     # Line-eater for tokenize.
 212     def tokeneater(self, type, token, (sline, scol), end, line,
 213                    INDENT=tokenize.INDENT,
 214                    DEDENT=tokenize.DEDENT,
 215                    NEWLINE=tokenize.NEWLINE,
 216                    COMMENT=tokenize.COMMENT,
 217                    NL=tokenize.NL):
 218
 219         if type == NEWLINE:
 220             # A program statement, or ENDMARKER, will eventually follow,
 221             # after some (possibly empty) run of tokens of the form
 222             #     (NL | COMMENT)* (INDENT | DEDENT+)?
 223             self.find_stmt = 1
 224
 225         elif type == INDENT:
 226             self.find_stmt = 1
 227             self.level += 1
 228
 229         elif type == DEDENT:
 230             self.find_stmt = 1
 231             self.level -= 1
 232
 233         elif type == COMMENT:
 234             if self.find_stmt:
 235                 self.stats.append((sline, -1))
 236                 # but we're still looking for a new stmt, so leave
 237                 # find_stmt alone
 238
 239         elif type == NL:
 240             pass
 241
 242         elif self.find_stmt:
 243             # This is the first "real token" following a NEWLINE, so it
 244             # must be the first token of the next program statement, or an
 245             # ENDMARKER.
 246             self.find_stmt = 0
 247             if line:   # not endmarker
 248                 self.stats.append((sline, self.level))
 249
 250 # Count number of leading blanks.
 251 def getlspace(line):
 252     i, n = 0, len(line)
 253     while i < n and line[i] == " ":
 254         i += 1
 255     return i
 256
 257 if __name__ == '__main__':
 258     main()