Tools/scripts/reindent.py

   1 #! /usr/bin/env python
   2
   3 # Released to the public domain, by Tim Peters, 03 October 2000.
   4
   5 """reindent [-d][-r][-v] [ path ... ]
   6
   7 -d (--dryrun)  Dry run.  Analyze, but don't make any changes to, files.
   8 -r (--recurse) Recurse.  Search for all .py files in subdirectories too.
   9 -v (--verbose) Verbose.  Print informative msgs; else no output.
  10 -h (--help)    Help.     Print this usage information and exit.
  11
  12 Change Python (.py) files to use 4-space indents and no hard tab characters.
  13 Also trim excess spaces and tabs from ends of lines, and remove empty lines
  14 at the end of files.  Also ensure the last line ends with a newline.
  15
  16 If no paths are given on the command line, reindent operates as a filter,
  17 reading a single source file from standard input and writing the transformed
  18 source to standard output.  In this case, the -d, -r and -v flags are
  19 ignored.
  20
  21 You can pass one or more file and/or directory paths.  When a directory
  22 path, all .py files within the directory will be examined, and, if the -r
  23 option is given, likewise recursively for subdirectories.
  24
  25 If output is not to standard output, reindent overwrites files in place,
  26 renaming the originals with a .bak extension.  If it finds nothing to
  27 change, the file is left alone.  If reindent does change a file, the changed
  28 file is a fixed-point for future runs (i.e., running reindent on the
  29 resulting .py file won't change it again).
  30
  31 The hard part of reindenting is figuring out what to do with comment
  32 lines.  So long as the input files get a clean bill of health from
  33 tabnanny.py, reindent should do a good job.
  34 """
  35
  36 __version__ = "1"
  37
  38 import tokenize
  39 import os
  40 import sys
  41
  42 verbose = 0
  43 recurse = 0
  44 dryrun  = 0
  45
  46 def usage(msg=None):
  47     if msg is not None:
  48         print >> sys.stderr, msg
  49     print >> sys.stderr, __doc__
  50
  51 def errprint(*args):
  52     sep = ""
  53     for arg in args:
  54         sys.stderr.write(sep + str(arg))
  55         sep = " "
  56     sys.stderr.write("\n")
  57
  58 def main():
  59     import getopt
  60     global verbose, recurse, dryrun
  61     try:
  62         opts, args = getopt.getopt(sys.argv[1:], "drvh",
  63                                    ["dryrun", "recurse", "verbose", "help"])
  64     except getopt.error, msg:
  65         usage(msg)
  66         return
  67     for o, a in opts:
  68         if o in ('-d', '--dryrun'):
  69             dryrun += 1
  70         elif o in ('-r', '--recurse'):
  71             recurse += 1
  72         elif o in ('-v', '--verbose'):
  73             verbose += 1
  74         elif o in ('-h', '--help'):
  75             usage()
  76             return
  77     if not args:
  78         r = Reindenter(sys.stdin)
  79         r.run()
  80         r.write(sys.stdout)
  81         return
  82     for arg in args:
  83         check(arg)
  84
  85 def check(file):
  86     if os.path.isdir(file) and not os.path.islink(file):
  87         if verbose:
  88             print "listing directory", file
  89         names = os.listdir(file)
  90         for name in names:
  91             fullname = os.path.join(file, name)
  92             if ((recurse and os.path.isdir(fullname) and
  93                  not os.path.islink(fullname))
  94                 or name.lower().endswith(".py")):
  95                 check(fullname)
  96         return
  97
  98     if verbose:
  99         print "checking", file, "...",
 100     try:
 101         f = open(file)
 102     except IOError, msg:
 103         errprint("%s: I/O Error: %s" % (file, str(msg)))
 104         return
 105
 106     r = Reindenter(f)
 107     f.close()
 108     if r.run():
 109         if verbose:
 110             print "changed."
 111             if dryrun:
 112                 print "But this is a dry run, so leaving it alone."
 113         if not dryrun:
 114             bak = file + ".bak"
 115             if os.path.exists(bak):
 116                 os.remove(bak)
 117             os.rename(file, bak)
 118             if verbose:
 119                 print "renamed", file, "to", bak
 120             f = open(file, "w")
 121             r.write(f)
 122             f.close()
 123             if verbose:
 124                 print "wrote new", file
 125     else:
 126         if verbose:
 127             print "unchanged."
 128
 129 def _rstrip(line, JUNK='\n \t'):
 130     """Return line stripped of trailing spaces, tabs, newlines.
 131
 132     Note that line.rstrip() instead also strips sundry control characters,
 133     but at least one known Emacs user expects to keep junk like that, not
 134     mentioning Barry by name or anything <wink>.
 135     """
 136
 137     i = len(line)
 138     while i > 0 and line[i-1] in JUNK:
 139         i -= 1
 140     return line[:i]
 141
 142 class Reindenter:
 143
 144     def __init__(self, f):
 145         self.find_stmt = 1  # next token begins a fresh stmt?
 146         self.level = 0      # current indent level
 147
 148         # Raw file lines.
 149         self.raw = f.readlines()
 150
 151         # File lines, rstripped & tab-expanded.  Dummy at start is so
 152         # that we can use tokenize's 1-based line numbering easily.
 153         # Note that a line is all-blank iff it's "\n".
 154         self.lines = [_rstrip(line).expandtabs() + "\n"
 155                       for line in self.raw]
 156         self.lines.insert(0, None)
 157         self.index = 1  # index into self.lines of next line
 158
 159         # List of (lineno, indentlevel) pairs, one for each stmt and
 160         # comment line.  indentlevel is -1 for comment lines, as a
 161         # signal that tokenize doesn't know what to do about them;
 162         # indeed, they're our headache!
 163         self.stats = []
 164
 165     def run(self):
 166         tokenize.tokenize(self.getline, self.tokeneater)
 167         # Remove trailing empty lines.
 168         lines = self.lines
 169         while lines and lines[-1] == "\n":
 170             lines.pop()
 171         # Sentinel.
 172         stats = self.stats
 173         stats.append((len(lines), 0))
 174         # Map count of leading spaces to # we want.
 175         have2want = {}
 176         # Program after transformation.
 177         after = self.after = []
 178         # Copy over initial empty lines -- there's nothing to do until
 179         # we see a line with *something* on it.
 180         i = stats[0][0]
 181         after.extend(lines[1:i])
 182         for i in range(len(stats)-1):
 183             thisstmt, thislevel = stats[i]
 184             nextstmt = stats[i+1][0]
 185             have = getlspace(lines[thisstmt])
 186             want = thislevel * 4
 187             if want < 0:
 188                 # A comment line.
 189                 if have:
 190                     # An indented comment line.  If we saw the same
 191                     # indentation before, reuse what it most recently
 192                     # mapped to.
 193                     want = have2want.get(have, -1)
 194                     if want < 0:
 195                         # Then it probably belongs to the next real stmt.
 196                         for j in xrange(i+1, len(stats)-1):
 197                             jline, jlevel = stats[j]
 198                             if jlevel >= 0:
 199                                 if have == getlspace(lines[jline]):
 200                                     want = jlevel * 4
 201                                 break
 202                     if want < 0:           # Maybe it's a hanging
 203                                            # comment like this one,
 204                         # in which case we should shift it like its base
 205                         # line got shifted.
 206                         for j in xrange(i-1, -1, -1):
 207                             jline, jlevel = stats[j]
 208                             if jlevel >= 0:
 209                                 want = have + getlspace(after[jline-1]) - \
 210                                        getlspace(lines[jline])
 211                                 break
 212                     if want < 0:
 213                         # Still no luck -- leave it alone.
 214                         want = have
 215                 else:
 216                     want = 0
 217             assert want >= 0
 218             have2want[have] = want
 219             diff = want - have
 220             if diff == 0 or have == 0:
 221                 after.extend(lines[thisstmt:nextstmt])
 222             else:
 223                 for line in lines[thisstmt:nextstmt]:
 224                     if diff > 0:
 225                         if line == "\n":
 226                             after.append(line)
 227                         else:
 228                             after.append(" " * diff + line)
 229                     else:
 230                         remove = min(getlspace(line), -diff)
 231                         after.append(line[remove:])
 232         return self.raw != self.after
 233
 234     def write(self, f):
 235         f.writelines(self.after)
 236
 237     # Line-getter for tokenize.
 238     def getline(self):
 239         if self.index >= len(self.lines):
 240             line = ""
 241         else:
 242             line = self.lines[self.index]
 243             self.index += 1
 244         return line
 245
 246     # Line-eater for tokenize.
 247     def tokeneater(self, type, token, (sline, scol), end, line,
 248                    INDENT=tokenize.INDENT,
 249                    DEDENT=tokenize.DEDENT,
 250                    NEWLINE=tokenize.NEWLINE,
 251                    COMMENT=tokenize.COMMENT,
 252                    NL=tokenize.NL):
 253
 254         if type == NEWLINE:
 255             # A program statement, or ENDMARKER, will eventually follow,
 256             # after some (possibly empty) run of tokens of the form
 257             #     (NL | COMMENT)* (INDENT | DEDENT+)?
 258             self.find_stmt = 1
 259
 260         elif type == INDENT:
 261             self.find_stmt = 1
 262             self.level += 1
 263
 264         elif type == DEDENT:
 265             self.find_stmt = 1
 266             self.level -= 1
 267
 268         elif type == COMMENT:
 269             if self.find_stmt:
 270                 self.stats.append((sline, -1))
 271                 # but we're still looking for a new stmt, so leave
 272                 # find_stmt alone
 273
 274         elif type == NL:
 275             pass
 276
 277         elif self.find_stmt:
 278             # This is the first "real token" following a NEWLINE, so it
 279             # must be the first token of the next program statement, or an
 280             # ENDMARKER.
 281             self.find_stmt = 0
 282             if line:   # not endmarker
 283                 self.stats.append((sline, self.level))
 284
 285 # Count number of leading blanks.
 286 def getlspace(line):
 287     i, n = 0, len(line)
 288     while i < n and line[i] == " ":
 289         i += 1
 290     return i
 291
 292 if __name__ == '__main__':
 293     main()