Tools/scripts/pindent.py

   1 #! /usr/bin/env python
   2
   3 # This file contains a class and a main program that perform two
   4 # related (though complimentary) formatting operations on Python
   5 # programs.  When called as "pindend -c", it takes a valid Python
   6 # program as input and outputs a version augmented with block-closing
   7 # comments.  When called as "pindent -r" it assumes its input is a
   8 # Python program with block-closing comments but with its indentation
   9 # messed up, and outputs a properly indented version.
  10
  11 # A "block-closing comment" is a comment of the form '# end <keyword>'
  12 # where <keyword> is the keyword that opened the block.  If the
  13 # opening keyword is 'def' or 'class', the function or class name may
  14 # be repeated in the block-closing comment as well.  Here is an
  15 # example of a program fully augmented with block-closing comments:
  16
  17 # def foobar(a, b):
  18 #    if a == b:
  19 #        a = a+1
  20 #    elif a < b:
  21 #        b = b-1
  22 #        if b > a: a = a-1
  23 #        # end if
  24 #    else:
  25 #        print 'oops!'
  26 #    # end if
  27 # # end def foobar
  28
  29 # Note that only the last part of an if...elif...else... block needs a
  30 # block-closing comment; the same is true for other compound
  31 # statements (e.g. try...except).  Also note that "short-form" blocks
  32 # like the second 'if' in the example must be closed as well;
  33 # otherwise the 'else' in the example would be ambiguous (remember
  34 # that indentation is not significant when interpreting block-closing
  35 # comments).
  36
  37 # Both operations are idempotent (i.e. applied to their own output
  38 # they yield an identical result).  Running first "pindent -c" and
  39 # then "pindent -r" on a valid Python program produces a program that
  40 # is semantically identical to the input (though its indentation may
  41 # be different).
  42
  43 # Other options:
  44 # -s stepsize: set the indentation step size (default 8)
  45 # -t tabsize : set the number of spaces a tab character is worth (default 8)
  46 # file ...   : input file(s) (default standard input)
  47 # The results always go to standard output
  48
  49 # Caveats:
  50 # - comments ending in a backslash will be mistaken for continued lines
  51 # - continuations using backslash are always left unchanged
  52 # - continuations inside parentheses are not extra indented by -r
  53 #   but must be indented for -c to work correctly (this breaks
  54 #   idempotency!)
  55 # - continued lines inside triple-quoted strings are totally garbled
  56
  57 # Secret feature:
  58 # - On input, a block may also be closed with an "end statement" --
  59 #   this is a block-closing comment without the '#' sign.
  60
  61 # Possible improvements:
  62 # - check syntax based on transitions in 'next' table
  63 # - better error reporting
  64 # - better error recovery
  65 # - check identifier after class/def
  66
  67 # The following wishes need a more complete tokenization of the source:
  68 # - Don't get fooled by comments ending in backslash
  69 # - reindent continuation lines indicated by backslash
  70 # - handle continuation lines inside parentheses/braces/brackets
  71 # - handle triple quoted strings spanning lines
  72 # - realign comments
  73 # - optionally do much more thorough reformatting, a la C indent
  74
  75 # Defaults
  76 STEPSIZE = 8
  77 TABSIZE = 8
  78
  79 import os
  80 import re
  81 import string
  82 import sys
  83
  84 next = {}
  85 next['if'] = next['elif'] = 'elif', 'else', 'end'
  86 next['while'] = next['for'] = 'else', 'end'
  87 next['try'] = 'except', 'finally'
  88 next['except'] = 'except', 'else', 'end'
  89 next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
  90 next['end'] = ()
  91 start = 'if', 'while', 'for', 'try', 'def', 'class'
  92
  93 class PythonIndenter:
  94
  95         def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
  96                      indentsize = STEPSIZE, tabsize = TABSIZE):
  97                 self.fpi = fpi
  98                 self.fpo = fpo
  99                 self.indentsize = indentsize
 100                 self.tabsize = tabsize
 101                 self.lineno = 0
 102                 self.write = fpo.write
 103                 self.kwprog = re.compile(
 104                         r'^\s*(?P<kw>[a-z]+)'
 105                         r'(\s+(?P<id>[a-zA-Z_]\w*))?'
 106                         r'[^\w]')
 107                 self.endprog = re.compile(
 108                         r'^\s*#?\s*end\s+(?P<kw>[a-z]+)'
 109                         r'(\s+(?P<id>[a-zA-Z_]\w*))?'
 110                         r'[^\w]')
 111                 self.wsprog = re.compile(r'^[ \t]*')
 112         # end def __init__
 113
 114         def readline(self):
 115                 line = self.fpi.readline()
 116                 if line: self.lineno = self.lineno + 1
 117                 # end if
 118                 return line
 119         # end def readline
 120
 121         def error(self, fmt, *args):
 122                 if args: fmt = fmt % args
 123                 # end if
 124                 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
 125                 self.write('### %s ###\n' % fmt)
 126         # end def error
 127
 128         def getline(self):
 129                 line = self.readline()
 130                 while line[-2:] == '\\\n':
 131                         line2 = self.readline()
 132                         if not line2: break
 133                         # end if
 134                         line = line + line2
 135                 # end while
 136                 return line
 137         # end def getline
 138
 139         def putline(self, line, indent = None):
 140                 if indent is None:
 141                         self.write(line)
 142                         return
 143                 # end if
 144                 tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
 145                 i = 0
 146                 m = self.wsprog.match(line)
 147                 if m: i = m.end()
 148                 # end if
 149                 self.write('\t'*tabs + ' '*spaces + line[i:])
 150         # end def putline
 151
 152         def reformat(self):
 153                 stack = []
 154                 while 1:
 155                         line = self.getline()
 156                         if not line: break      # EOF
 157                         # end if
 158                         m = self.endprog.match(line)
 159                         if m:
 160                                 kw = 'end'
 161                                 kw2 = m.group('kw')
 162                                 if not stack:
 163                                         self.error('unexpected end')
 164                                 elif stack[-1][0] != kw2:
 165                                         self.error('unmatched end')
 166                                 # end if
 167                                 del stack[-1:]
 168                                 self.putline(line, len(stack))
 169                                 continue
 170                         # end if
 171                         m = self.kwprog.match(line)
 172                         if m:
 173                                 kw = m.group('kw')
 174                                 if kw in start:
 175                                         self.putline(line, len(stack))
 176                                         stack.append((kw, kw))
 177                                         continue
 178                                 # end if
 179                                 if next.has_key(kw) and stack:
 180                                         self.putline(line, len(stack)-1)
 181                                         kwa, kwb = stack[-1]
 182                                         stack[-1] = kwa, kw
 183                                         continue
 184                                 # end if
 185                         # end if
 186                         self.putline(line, len(stack))
 187                 # end while
 188                 if stack:
 189                         self.error('unterminated keywords')
 190                         for kwa, kwb in stack:
 191                                 self.write('\t%s\n' % kwa)
 192                         # end for
 193                 # end if
 194         # end def reformat
 195
 196         def complete(self):
 197                 self.indentsize = 1
 198                 stack = []
 199                 todo = []
 200                 current, firstkw, lastkw, topid = 0, '', '', ''
 201                 while 1:
 202                         line = self.getline()
 203                         i = 0
 204                         m = self.wsprog.match(line)
 205                         if m: i = m.end()
 206                         # end if
 207                         m = self.endprog.match(line)
 208                         if m:
 209                                 thiskw = 'end'
 210                                 endkw = m.group('kw')
 211                                 thisid = m.group('id')
 212                         else:
 213                                 m = self.kwprog.match(line)
 214                                 if m:
 215                                         thiskw = m.group('kw')
 216                                         if not next.has_key(thiskw):
 217                                                 thiskw = ''
 218                                         # end if
 219                                         if thiskw in ('def', 'class'):
 220                                                 thisid = m.group('id')
 221                                         else:
 222                                                 thisid = ''
 223                                         # end if
 224                                 elif line[i:i+1] in ('\n', '#'):
 225                                         todo.append(line)
 226                                         continue
 227                                 else:
 228                                         thiskw = ''
 229                                 # end if
 230                         # end if
 231                         indent = len(string.expandtabs(line[:i], self.tabsize))
 232                         while indent < current:
 233                                 if firstkw:
 234                                         if topid:
 235                                                 s = '# end %s %s\n' % (
 236                                                         firstkw, topid)
 237                                         else:
 238                                                 s = '# end %s\n' % firstkw
 239                                         # end if
 240                                         self.putline(s, current)
 241                                         firstkw = lastkw = ''
 242                                 # end if
 243                                 current, firstkw, lastkw, topid = stack[-1]
 244                                 del stack[-1]
 245                         # end while
 246                         if indent == current and firstkw:
 247                                 if thiskw == 'end':
 248                                         if endkw != firstkw:
 249                                                 self.error('mismatched end')
 250                                         # end if
 251                                         firstkw = lastkw = ''
 252                                 elif not thiskw or thiskw in start:
 253                                         if topid:
 254                                                 s = '# end %s %s\n' % (
 255                                                         firstkw, topid)
 256                                         else:
 257                                                 s = '# end %s\n' % firstkw
 258                                         # end if
 259                                         self.putline(s, current)
 260                                         firstkw = lastkw = topid = ''
 261                                 # end if
 262                         # end if
 263                         if indent > current:
 264                                 stack.append((current, firstkw, lastkw, topid))
 265                                 if thiskw and thiskw not in start:
 266                                         # error
 267                                         thiskw = ''
 268                                 # end if
 269                                 current, firstkw, lastkw, topid = \
 270                                          indent, thiskw, thiskw, thisid
 271                         # end if
 272                         if thiskw:
 273                                 if thiskw in start:
 274                                         firstkw = lastkw = thiskw
 275                                         topid = thisid
 276                                 else:
 277                                         lastkw = thiskw
 278                                 # end if
 279                         # end if
 280                         for l in todo: self.write(l)
 281                         # end for
 282                         todo = []
 283                         if not line: break
 284                         # end if
 285                         self.write(line)
 286                 # end while
 287         # end def complete
 288
 289 # end class PythonIndenter
 290
 291 # Simplified user interface
 292 # - xxx_filter(input, output): read and write file objects
 293 # - xxx_string(s): take and return string object
 294 # - xxx_file(filename): process file in place, return true iff changed
 295
 296 def complete_filter(input= sys.stdin, output = sys.stdout,
 297                     stepsize = STEPSIZE, tabsize = TABSIZE):
 298         pi = PythonIndenter(input, output, stepsize, tabsize)
 299         pi.complete()
 300 # end def complete_filter
 301
 302 def reformat_filter(input = sys.stdin, output = sys.stdout,
 303                     stepsize = STEPSIZE, tabsize = TABSIZE):
 304         pi = PythonIndenter(input, output, stepsize, tabsize)
 305         pi.reformat()
 306 # end def reformat
 307
 308 class StringReader:
 309         def __init__(self, buf):
 310                 self.buf = buf
 311                 self.pos = 0
 312                 self.len = len(self.buf)
 313         # end def __init__
 314         def read(self, n = 0):
 315                 if n <= 0:
 316                         n = self.len - self.pos
 317                 else:
 318                         n = min(n, self.len - self.pos)
 319                 # end if
 320                 r = self.buf[self.pos : self.pos + n]
 321                 self.pos = self.pos + n
 322                 return r
 323         # end def read
 324         def readline(self):
 325                 i = string.find(self.buf, '\n', self.pos)
 326                 return self.read(i + 1 - self.pos)
 327         # end def readline
 328         def readlines(self):
 329                 lines = []
 330                 line = self.readline()
 331                 while line:
 332                         lines.append(line)
 333                         line = self.readline()
 334                 # end while
 335                 return lines
 336         # end def readlines
 337         # seek/tell etc. are left as an exercise for the reader
 338 # end class StringReader
 339
 340 class StringWriter:
 341         def __init__(self):
 342                 self.buf = ''
 343         # end def __init__
 344         def write(self, s):
 345                 self.buf = self.buf + s
 346         # end def write
 347         def getvalue(self):
 348                 return self.buf
 349         # end def getvalue
 350 # end class StringWriter
 351
 352 def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE):
 353         input = StringReader(source)
 354         output = StringWriter()
 355         pi = PythonIndenter(input, output, stepsize, tabsize)
 356         pi.complete()
 357         return output.getvalue()
 358 # end def complete_string
 359
 360 def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE):
 361         input = StringReader(source)
 362         output = StringWriter()
 363         pi = PythonIndenter(input, output, stepsize, tabsize)
 364         pi.reformat()
 365         return output.getvalue()
 366 # end def reformat_string
 367
 368 def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE):
 369         source = open(filename, 'r').read()
 370         result = complete_string(source, stepsize, tabsize)
 371         if source == result: return 0
 372         # end if
 373         import os
 374         try: os.rename(filename, filename + '~')
 375         except os.error: pass
 376         # end try
 377         f = open(filename, 'w')
 378         f.write(result)
 379         f.close()
 380         return 1
 381 # end def complete_file
 382
 383 def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE):
 384         source = open(filename, 'r').read()
 385         result = reformat_string(source, stepsize, tabsize)
 386         if source == result: return 0
 387         # end if
 388         import os
 389         os.rename(filename, filename + '~')
 390         f = open(filename, 'w')
 391         f.write(result)
 392         f.close()
 393         return 1
 394 # end def reformat_file
 395
 396 # Test program when called as a script
 397
 398 usage = """
 399 usage: pindent (-c|-r) [-s stepsize] [-t tabsize] [file] ...
 400 -c         : complete a correctly indented program (add #end directives)
 401 -r         : reformat a completed program (use #end directives)
 402 -s stepsize: indentation step (default %(STEPSIZE)d)
 403 -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
 404 [file] ... : files are changed in place, with backups in file~
 405 If no files are specified or a single - is given,
 406 the program acts as a filter (reads stdin, writes stdout).
 407 """ % vars()
 408
 409 def test():
 410         import getopt
 411         try:
 412                 opts, args = getopt.getopt(sys.argv[1:], 'crs:t:')
 413         except getopt.error, msg:
 414                 sys.stderr.write('Error: %s\n' % msg)
 415                 sys.stderr.write(usage)
 416                 sys.exit(2)
 417         # end try
 418         action = None
 419         stepsize = STEPSIZE
 420         tabsize = TABSIZE
 421         for o, a in opts:
 422                 if o == '-c':
 423                         action = 'complete'
 424                 elif o == '-r':
 425                         action = 'reformat'
 426                 elif o == '-s':
 427                         stepsize = string.atoi(a)
 428                 elif o == '-t':
 429                         tabsize = string.atoi(a)
 430                 # end if
 431         # end for
 432         if not action:
 433                 sys.stderr.write(
 434                         'You must specify -c(omplete) or -r(eformat)\n')
 435                 sys.stderr.write(usage)
 436                 sys.exit(2)
 437         # end if
 438         if not args or args == ['-']:
 439                 action = eval(action + '_filter')
 440                 action(sys.stdin, sys.stdout, stepsize, tabsize)
 441         else:
 442                 action = eval(action + '_file')
 443                 for file in args:
 444                         action(file, stepsize, tabsize)
 445                 # end for
 446         # end if
 447 # end def test
 448
 449 if __name__ == '__main__':
 450         test()
 451 # end if