Tools/scripts/pindent.py

   1 #! /usr/local/bin/python
   2 #! /usr/local/bin/python
   3
   4 # This file contains a class and a main program that perform two
   5 # related (though complimentary) formatting operations on Python
   6 # programs.  When called as "pindend -c", it takes a valid Python
   7 # program as input and outputs a version augmented with block-closing
   8 # comments.  When called as "pindent -r" it assumes its input is a
   9 # Python program with block-closing comments but with its indentation
  10 # messed up, and outputs a properly indented version.
  11
  12 # A "block-closing comment" is a comment of the form '# end <keyword>'
  13 # where <keyword> is the keyword that opened the block.  If the
  14 # opening keyword is 'def' or 'class', the function or class name may
  15 # be repeated in the block-closing comment as well.  Here is an
  16 # example of a program fully augmented with block-closing comments:
  17
  18 # def foobar(a, b):
  19 #    if a == b:
  20 #        a = a+1
  21 #    elif a < b:
  22 #        b = b-1
  23 #        if b > a: a = a-1
  24 #        # end if
  25 #    else:
  26 #        print 'oops!'
  27 #    # end if
  28 # # end def foobar
  29
  30 # Note that only the last part of an if...elif...else... block needs a
  31 # block-closing comment; the same is true for other compound
  32 # statements (e.g. try...except).  Also note that "short-form" blocks
  33 # like the second 'if' in the example must be closed as well;
  34 # otherwise the 'else' in the example would be ambiguous (remember
  35 # that indentation is not significant when interpreting block-closing
  36 # comments).
  37
  38 # Both operations are idempotent (i.e. applied to their own output
  39 # they yield an identical result).  Running first "pindent -c" and
  40 # then "pindent -r" on a valid Python program produces a program that
  41 # is semantically identical to the input (though its indentation may
  42 # be different).
  43
  44 # Other options:
  45 # -s stepsize: set the indentation step size (default 8)
  46 # -t tabsize : set the number of spaces a tab character is worth (default 8)
  47 # file ...   : input file(s) (default standard input)
  48 # The results always go to standard output
  49
  50 # Caveats:
  51 # - comments ending in a backslash will be mistaken for continued lines
  52 # - continuations using backslash are always left unchanged
  53 # - continuations inside parentheses are not extra indented by -r
  54 #   but must be indented for -c to work correctly (this breaks
  55 #   idempotency!)
  56 # - continued lines inside triple-quoted strings are totally garbled
  57
  58 # Secret feature:
  59 # - On input, a block may also be closed with an "end statement" --
  60 #   this is a block-closing comment without the '#' sign.
  61
  62 # Possible improvements:
  63 # - check syntax based on transitions in 'next' table
  64 # - better error reporting
  65 # - better error recovery
  66 # - check identifier after class/def
  67
  68 # The following wishes need a more complete tokenization of the source:
  69 # - Don't get fooled by comments ending in backslash
  70 # - reindent continuation lines indicated by backslash
  71 # - handle continuation lines inside parentheses/braces/brackets
  72 # - handle triple quoted strings spanning lines
  73 # - realign comments
  74 # - optionally do much more thorough reformatting, a la C indent
  75
  76 # Defaults
  77 STEPSIZE = 8
  78 TABSIZE = 8
  79
  80 import os
  81 import regex
  82 import string
  83 import sys
  84
  85 next = {}
  86 next['if'] = next['elif'] = 'elif', 'else', 'end'
  87 next['while'] = next['for'] = 'else', 'end'
  88 next['try'] = 'except', 'finally'
  89 next['except'] = 'except', 'else', 'end'
  90 next['else'] = next['finally'] = next['def'] = next['class'] = 'end'
  91 next['end'] = ()
  92 start = 'if', 'while', 'for', 'try', 'def', 'class'
  93
  94 class PythonIndenter:
  95
  96         def __init__(self, fpi = sys.stdin, fpo = sys.stdout,
  97                      indentsize = STEPSIZE, tabsize = TABSIZE):
  98                 self.fpi = fpi
  99                 self.fpo = fpo
 100                 self.indentsize = indentsize
 101                 self.tabsize = tabsize
 102                 self.lineno = 0
 103                 self.write = fpo.write
 104                 self.kwprog = regex.symcomp(
 105                         '^[ \t]*\(<kw>[a-z]+\)'
 106                         '\([ \t]+\(<id>[a-zA-Z_][a-zA-Z0-9_]*\)\)?'
 107                         '[^a-zA-Z0-9_]')
 108                 self.endprog = regex.symcomp(
 109                         '^[ \t]*#?[ \t]*end[ \t]+\(<kw>[a-z]+\)'
 110                         '\([ \t]+\(<id>[a-zA-Z_][a-zA-Z0-9_]*\)\)?'
 111                         '[^a-zA-Z0-9_]')
 112                 self.wsprog = regex.compile('^[ \t]*')
 113         # end def __init__
 114
 115         def readline(self):
 116                 line = self.fpi.readline()
 117                 if line: self.lineno = self.lineno + 1
 118                 # end if
 119                 return line
 120         # end def readline
 121
 122         def error(self, fmt, *args):
 123                 if args: fmt = fmt % args
 124                 # end if
 125                 sys.stderr.write('Error at line %d: %s\n' % (self.lineno, fmt))
 126                 self.write('### %s ###\n' % fmt)
 127         # end def error
 128
 129         def getline(self):
 130                 line = self.readline()
 131                 while line[-2:] == '\\\n':
 132                         line2 = self.readline()
 133                         if not line2: break
 134                         # end if
 135                         line = line + line2
 136                 # end while
 137                 return line
 138         # end def getline
 139
 140         def putline(self, line, indent = None):
 141                 if indent is None:
 142                         self.write(line)
 143                         return
 144                 # end if
 145                 tabs, spaces = divmod(indent*self.indentsize, self.tabsize)
 146                 i = max(0, self.wsprog.match(line))
 147                 self.write('\t'*tabs + ' '*spaces + line[i:])
 148         # end def putline
 149
 150         def reformat(self):
 151                 stack = []
 152                 while 1:
 153                         line = self.getline()
 154                         if not line: break      # EOF
 155                         # end if
 156                         if self.endprog.match(line) >= 0:
 157                                 kw = 'end'
 158                                 kw2 = self.endprog.group('kw')
 159                                 if not stack:
 160                                         self.error('unexpected end')
 161                                 elif stack[-1][0] != kw2:
 162                                         self.error('unmatched end')
 163                                 # end if
 164                                 del stack[-1:]
 165                                 self.putline(line, len(stack))
 166                                 continue
 167                         # end if
 168                         if self.kwprog.match(line) >= 0:
 169                                 kw = self.kwprog.group('kw')
 170                                 if kw in start:
 171                                         self.putline(line, len(stack))
 172                                         stack.append((kw, kw))
 173                                         continue
 174                                 # end if
 175                                 if next.has_key(kw) and stack:
 176                                         self.putline(line, len(stack)-1)
 177                                         kwa, kwb = stack[-1]
 178                                         stack[-1] = kwa, kw
 179                                         continue
 180                                 # end if
 181                         # end if
 182                         self.putline(line, len(stack))
 183                 # end while
 184                 if stack:
 185                         self.error('unterminated keywords')
 186                         for kwa, kwb in stack:
 187                                 self.write('\t%s\n' % kwa)
 188                         # end for
 189                 # end if
 190         # end def reformat
 191
 192         def complete(self):
 193                 self.indentsize = 1
 194                 stack = []
 195                 todo = []
 196                 current, firstkw, lastkw, topid = 0, '', '', ''
 197                 while 1:
 198                         line = self.getline()
 199                         i = max(0, self.wsprog.match(line))
 200                         if self.endprog.match(line) >= 0:
 201                                 thiskw = 'end'
 202                                 endkw = self.endprog.group('kw')
 203                                 thisid = self.endprog.group('id')
 204                         elif self.kwprog.match(line) >= 0:
 205                                 thiskw = self.kwprog.group('kw')
 206                                 if not next.has_key(thiskw):
 207                                         thiskw = ''
 208                                 # end if
 209                                 if thiskw in ('def', 'class'):
 210                                         thisid = self.kwprog.group('id')
 211                                 else:
 212                                         thisid = ''
 213                                 # end if
 214                         elif line[i:i+1] in ('\n', '#'):
 215                                 todo.append(line)
 216                                 continue
 217                         else:
 218                                 thiskw = ''
 219                         # end if
 220                         indent = len(string.expandtabs(line[:i], self.tabsize))
 221                         while indent < current:
 222                                 if firstkw:
 223                                         if topid:
 224                                                 s = '# end %s %s\n' % (
 225                                                         firstkw, topid)
 226                                         else:
 227                                                 s = '# end %s\n' % firstkw
 228                                         # end if
 229                                         self.putline(s, current)
 230                                         firstkw = lastkw = ''
 231                                 # end if
 232                                 current, firstkw, lastkw, topid = stack[-1]
 233                                 del stack[-1]
 234                         # end while
 235                         if indent == current and firstkw:
 236                                 if thiskw == 'end':
 237                                         if endkw != firstkw:
 238                                                 self.error('mismatched end')
 239                                         # end if
 240                                         firstkw = lastkw = ''
 241                                 elif not thiskw or thiskw in start:
 242                                         if topid:
 243                                                 s = '# end %s %s\n' % (
 244                                                         firstkw, topid)
 245                                         else:
 246                                                 s = '# end %s\n' % firstkw
 247                                         # end if
 248                                         self.putline(s, current)
 249                                         firstkw = lastkw = topid = ''
 250                                 # end if
 251                         # end if
 252                         if indent > current:
 253                                 stack.append(current, firstkw, lastkw, topid)
 254                                 if thiskw and thiskw not in start:
 255                                         # error
 256                                         thiskw = ''
 257                                 # end if
 258                                 current, firstkw, lastkw, topid = \
 259                                          indent, thiskw, thiskw, thisid
 260                         # end if
 261                         if thiskw:
 262                                 if thiskw in start:
 263                                         firstkw = lastkw = thiskw
 264                                         topid = thisid
 265                                 else:
 266                                         lastkw = thiskw
 267                                 # end if
 268                         # end if
 269                         for l in todo: self.write(l)
 270                         # end for
 271                         todo = []
 272                         if not line: break
 273                         # end if
 274                         self.write(line)
 275                 # end while
 276         # end def complete
 277
 278 # end class PythonIndenter
 279
 280 # Simplified user interface
 281 # - xxx_filter(input, output): read and write file objects
 282 # - xxx_string(s): take and return string object
 283 # - xxx_file(filename): process file in place, return true iff changed
 284
 285 def complete_filter(input= sys.stdin, output = sys.stdout,
 286                     stepsize = STEPSIZE, tabsize = TABSIZE):
 287         pi = PythonIndenter(input, output, stepsize, tabsize)
 288         pi.complete()
 289 # end def complete_filter
 290
 291 def reformat_filter(input = sys.stdin, output = sys.stdout,
 292                     stepsize = STEPSIZE, tabsize = TABSIZE):
 293         pi = PythonIndenter(input, output, stepsize, tabsize)
 294         pi.reformat()
 295 # end def reformat
 296
 297 class StringReader:
 298         def __init__(self, buf):
 299                 self.buf = buf
 300                 self.pos = 0
 301                 self.len = len(self.buf)
 302         # end def __init__
 303         def read(self, n = 0):
 304                 if n <= 0:
 305                         n = self.len - self.pos
 306                 else:
 307                         n = min(n, self.len - self.pos)
 308                 # end if
 309                 r = self.buf[self.pos : self.pos + n]
 310                 self.pos = self.pos + n
 311                 return r
 312         # end def read
 313         def readline(self):
 314                 i = string.find(self.buf, '\n', self.pos)
 315                 return self.read(i + 1 - self.pos)
 316         # end def readline
 317         def readlines(self):
 318                 lines = []
 319                 line = self.readline()
 320                 while line:
 321                         lines.append(line)
 322                         line = self.readline()
 323                 # end while
 324                 return lines
 325         # end def readlines
 326         # seek/tell etc. are left as an exercise for the reader
 327 # end class StringReader
 328
 329 class StringWriter:
 330         def __init__(self):
 331                 self.buf = ''
 332         # end def __init__
 333         def write(self, s):
 334                 self.buf = self.buf + s
 335         # end def write
 336         def getvalue(self):
 337                 return self.buf
 338         # end def getvalue
 339 # end class StringWriter
 340
 341 def complete_string(source, stepsize = STEPSIZE, tabsize = TABSIZE):
 342         input = StringReader(source)
 343         output = StringWriter()
 344         pi = PythonIndenter(input, output, stepsize, tabsize)
 345         pi.complete()
 346         return output.getvalue()
 347 # end def complete_string
 348
 349 def reformat_string(source, stepsize = STEPSIZE, tabsize = TABSIZE):
 350         input = StringReader(source)
 351         output = StringWriter()
 352         pi = PythonIndenter(input, output, stepsize, tabsize)
 353         pi.reformat()
 354         return output.getvalue()
 355 # end def reformat_string
 356
 357 def complete_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE):
 358         source = open(filename, 'r').read()
 359         result = complete_string(source, stepsize, tabsize)
 360         if source == result: return 0
 361         # end if
 362         import os
 363         try: os.rename(filename, filename + '~')
 364         except os.error: pass
 365         # end try
 366         f = open(filename, 'w')
 367         f.write(result)
 368         f.close()
 369         return 1
 370 # end def complete_file
 371
 372 def reformat_file(filename, stepsize = STEPSIZE, tabsize = TABSIZE):
 373         source = open(filename, 'r').read()
 374         result = reformat_string(source, stepsize, tabsize)
 375         if source == result: return 0
 376         # end if
 377         import os
 378         os.rename(filename, filename + '~')
 379         f = open(filename, 'w')
 380         f.write(result)
 381         f.close()
 382         return 1
 383 # end def reformat_file
 384
 385 # Test program when called as a script
 386
 387 usage = """
 388 usage: pindent (-c|-r) [-s stepsize] [-t tabsize] [file] ...
 389 -c         : complete a correctly indented program (add #end directives)
 390 -r         : reformat a completed program (use #end directives)
 391 -s stepsize: indentation step (default %(STEPSIZE)d)
 392 -t tabsize : the worth in spaces of a tab (default %(TABSIZE)d)
 393 [file] ... : files are changed in place, with backups in file~
 394 If no files are specified or a single - is given,
 395 the program acts as a filter (reads stdin, writes stdout).
 396 """ % vars()
 397
 398 def test():
 399         import getopt
 400         try:
 401                 opts, args = getopt.getopt(sys.argv[1:], 'crs:t:')
 402         except getopt.error, msg:
 403                 sys.stderr.write('Error: %s\n' % msg)
 404                 sys.stderr.write(usage)
 405                 sys.exit(2)
 406         # end try
 407         action = None
 408         stepsize = STEPSIZE
 409         tabsize = TABSIZE
 410         for o, a in opts:
 411                 if o == '-c':
 412                         action = 'complete'
 413                 elif o == '-r':
 414                         action = 'reformat'
 415                 elif o == '-s':
 416                         stepsize = string.atoi(a)
 417                 elif o == '-t':
 418                         tabsize = string.atoi(a)
 419                 # end if
 420         # end for
 421         if not action:
 422                 sys.stderr.write(
 423                         'You must specify -c(omplete) or -r(eformat)\n')
 424                 sys.stderr.write(usage)
 425                 sys.exit(2)
 426         # end if
 427         if not args or args == ['-']:
 428                 action = eval(action + '_filter')
 429                 action(sys.stdin, sys.stdout, stepsize, tabsize)
 430         else:
 431                 action = eval(action + '_file')
 432                 for file in args:
 433                         action(file, stepsize, tabsize)
 434                 # end for
 435         # end if
 436 # end def test
 437
 438 if __name__ == '__main__':
 439         test()
 440 # end if