Tools/scripts/reindent.py

   1 #! /usr/bin/env python
   2
   3 # Released to the public domain, by Tim Peters, 03 October 2000.
   4
   5 """reindent [-d][-r][-v] [ path ... ]
   6
   7 -d  Dry run.  Analyze, but don't make any changes to, files.
   8 -r  Recurse.  Search for all .py files in subdirectories too.
   9 -v  Verbose.  Print informative msgs; else no output.
  10
  11 Change Python (.py) files to use 4-space indents and no hard tab characters.
  12 Also trim excess spaces and tabs from ends of lines, and remove empty lines
  13 at the end of files.  Also ensure the last line ends with a newline.
  14
  15 If no paths are given on the command line, reindent operates as a filter,
  16 reading a single source file from standard input and writing the transformed
  17 source to standard output.  In this case, the -d, -r and -v flags are
  18 ignored.
  19
  20 You can pass one or more file and/or directory paths.  When a directory
  21 path, all .py files within the directory will be examined, and, if the -r
  22 option is given, likewise recursively for subdirectories.
  23
  24 If output is not to standard output, reindent overwrites files in place,
  25 renaming the originals with a .bak extension.  If it finds nothing to
  26 change, the file is left alone.  If reindent does change a file, the changed
  27 file is a fixed-point for future runs (i.e., running reindent on the
  28 resulting .py file won't change it again).
  29
  30 The hard part of reindenting is figuring out what to do with comment
  31 lines.  So long as the input files get a clean bill of health from
  32 tabnanny.py, reindent should do a good job.
  33 """
  34
  35 __version__ = "1"
  36
  37 import tokenize
  38 import os
  39 import sys
  40
  41 verbose = 0
  42 recurse = 0
  43 dryrun  = 0
  44
  45 def errprint(*args):
  46     sep = ""
  47     for arg in args:
  48         sys.stderr.write(sep + str(arg))
  49         sep = " "
  50     sys.stderr.write("\n")
  51
  52 def main():
  53     import getopt
  54     global verbose, recurse, dryrun
  55     try:
  56         opts, args = getopt.getopt(sys.argv[1:], "drv")
  57     except getopt.error, msg:
  58         errprint(msg)
  59         return
  60     for o, a in opts:
  61         if o == '-d':
  62             dryrun += 1
  63         elif o == '-r':
  64             recurse += 1
  65         elif o == '-v':
  66             verbose += 1
  67     if not args:
  68         r = Reindenter(sys.stdin)
  69         r.run()
  70         r.write(sys.stdout)
  71         return
  72     for arg in args:
  73         check(arg)
  74
  75 def check(file):
  76     if os.path.isdir(file) and not os.path.islink(file):
  77         if verbose:
  78             print "listing directory", file
  79         names = os.listdir(file)
  80         for name in names:
  81             fullname = os.path.join(file, name)
  82             if ((recurse and os.path.isdir(fullname) and
  83                  not os.path.islink(fullname))
  84                 or name.lower().endswith(".py")):
  85                 check(fullname)
  86         return
  87
  88     if verbose:
  89         print "checking", file, "...",
  90     try:
  91         f = open(file)
  92     except IOError, msg:
  93         errprint("%s: I/O Error: %s" % (file, str(msg)))
  94         return
  95
  96     r = Reindenter(f)
  97     f.close()
  98     if r.run():
  99         if verbose:
 100             print "changed."
 101             if dryrun:
 102                 print "But this is a dry run, so leaving it alone."
 103         if not dryrun:
 104             bak = file + ".bak"
 105             if os.path.exists(bak):
 106                 os.remove(bak)
 107             os.rename(file, bak)
 108             if verbose:
 109                 print "renamed", file, "to", bak
 110             f = open(file, "w")
 111             r.write(f)
 112             f.close()
 113             if verbose:
 114                 print "wrote new", file
 115     else:
 116         if verbose:
 117             print "unchanged."
 118
 119 def _rstrip(line, JUNK='\n \t'):
 120     """Return line stripped of trailing spaces, tabs, newlines.
 121
 122     Note that line.rstrip() instead also strips sundry control characters,
 123     but at least one known Emacs user expects to keep junk like that, not
 124     mentioning Barry by name or anything <wink>.
 125     """
 126
 127     i = len(line)
 128     while i > 0 and line[i-1] in JUNK:
 129         i -= 1
 130     return line[:i]
 131
 132 class Reindenter:
 133
 134     def __init__(self, f):
 135         self.find_stmt = 1  # next token begins a fresh stmt?
 136         self.level = 0      # current indent level
 137
 138         # Raw file lines.
 139         self.raw = f.readlines()
 140
 141         # File lines, rstripped & tab-expanded.  Dummy at start is so
 142         # that we can use tokenize's 1-based line numbering easily.
 143         # Note that a line is all-blank iff it's "\n".
 144         self.lines = [_rstrip(line).expandtabs() + "\n"
 145                       for line in self.raw]
 146         self.lines.insert(0, None)
 147         self.index = 1  # index into self.lines of next line
 148
 149         # List of (lineno, indentlevel) pairs, one for each stmt and
 150         # comment line.  indentlevel is -1 for comment lines, as a
 151         # signal that tokenize doesn't know what to do about them;
 152         # indeed, they're our headache!
 153         self.stats = []
 154
 155     def run(self):
 156         tokenize.tokenize(self.getline, self.tokeneater)
 157         # Remove trailing empty lines.
 158         lines = self.lines
 159         while lines and lines[-1] == "\n":
 160             lines.pop()
 161         # Sentinel.
 162         stats = self.stats
 163         stats.append((len(lines), 0))
 164         # Map count of leading spaces to # we want.
 165         have2want = {}
 166         # Program after transformation.
 167         after = self.after = []
 168         # Copy over initial empty lines -- there's nothing to do until
 169         # we see a line with *something* on it.
 170         i = stats[0][0]
 171         after.extend(lines[1:i])
 172         for i in range(len(stats)-1):
 173             thisstmt, thislevel = stats[i]
 174             nextstmt = stats[i+1][0]
 175             have = getlspace(lines[thisstmt])
 176             want = thislevel * 4
 177             if want < 0:
 178                 # A comment line.
 179                 if have:
 180                     # An indented comment line.  If we saw the same
 181                     # indentation before, reuse what it most recently
 182                     # mapped to.
 183                     want = have2want.get(have, -1)
 184                     if want < 0:
 185                         # Then it probably belongs to the next real stmt.
 186                         for j in xrange(i+1, len(stats)-1):
 187                             jline, jlevel = stats[j]
 188                             if jlevel >= 0:
 189                                 if have == getlspace(lines[jline]):
 190                                     want = jlevel * 4
 191                                 break
 192                     if want < 0:           # Maybe it's a hanging
 193                                            # comment like this one,
 194                         # in which case we should shift it like its base
 195                         # line got shifted.
 196                         for j in xrange(i-1, -1, -1):
 197                             jline, jlevel = stats[j]
 198                             if jlevel >= 0:
 199                                 want = have + getlspace(after[jline-1]) - \
 200                                        getlspace(lines[jline])
 201                                 break
 202                     if want < 0:
 203                         # Still no luck -- leave it alone.
 204                         want = have
 205                 else:
 206                     want = 0
 207             assert want >= 0
 208             have2want[have] = want
 209             diff = want - have
 210             if diff == 0 or have == 0:
 211                 after.extend(lines[thisstmt:nextstmt])
 212             else:
 213                 for line in lines[thisstmt:nextstmt]:
 214                     if diff > 0:
 215                         if line == "\n":
 216                             after.append(line)
 217                         else:
 218                             after.append(" " * diff + line)
 219                     else:
 220                         remove = min(getlspace(line), -diff)
 221                         after.append(line[remove:])
 222         return self.raw != self.after
 223
 224     def write(self, f):
 225         f.writelines(self.after)
 226
 227     # Line-getter for tokenize.
 228     def getline(self):
 229         if self.index >= len(self.lines):
 230             line = ""
 231         else:
 232             line = self.lines[self.index]
 233             self.index += 1
 234         return line
 235
 236     # Line-eater for tokenize.
 237     def tokeneater(self, type, token, (sline, scol), end, line,
 238                    INDENT=tokenize.INDENT,
 239                    DEDENT=tokenize.DEDENT,
 240                    NEWLINE=tokenize.NEWLINE,
 241                    COMMENT=tokenize.COMMENT,
 242                    NL=tokenize.NL):
 243
 244         if type == NEWLINE:
 245             # A program statement, or ENDMARKER, will eventually follow,
 246             # after some (possibly empty) run of tokens of the form
 247             #     (NL | COMMENT)* (INDENT | DEDENT+)?
 248             self.find_stmt = 1
 249
 250         elif type == INDENT:
 251             self.find_stmt = 1
 252             self.level += 1
 253
 254         elif type == DEDENT:
 255             self.find_stmt = 1
 256             self.level -= 1
 257
 258         elif type == COMMENT:
 259             if self.find_stmt:
 260                 self.stats.append((sline, -1))
 261                 # but we're still looking for a new stmt, so leave
 262                 # find_stmt alone
 263
 264         elif type == NL:
 265             pass
 266
 267         elif self.find_stmt:
 268             # This is the first "real token" following a NEWLINE, so it
 269             # must be the first token of the next program statement, or an
 270             # ENDMARKER.
 271             self.find_stmt = 0
 272             if line:   # not endmarker
 273                 self.stats.append((sline, self.level))
 274
 275 # Count number of leading blanks.
 276 def getlspace(line):
 277     i, n = 0, len(line)
 278     while i < n and line[i] == " ":
 279         i += 1
 280     return i
 281
 282 if __name__ == '__main__':
 283     main()