third_party/pycoverage/coverage/parser.py

   1 """Code parsing for Coverage."""
   2
   3 import dis, re, sys, token, tokenize
   4
   5 from coverage.backward import set, sorted, StringIO # pylint: disable=W0622
   6 from coverage.backward import open_source, range    # pylint: disable=W0622
   7 from coverage.backward import reversed              # pylint: disable=W0622
   8 from coverage.backward import bytes_to_ints
   9 from coverage.bytecode import ByteCodes, CodeObjects
  10 from coverage.misc import nice_pair, expensive, join_regex
  11 from coverage.misc import CoverageException, NoSource, NotPython
  12
  13
  14 class CodeParser(object):
  15     """Parse code to find executable lines, excluded lines, etc."""
  16
  17     def __init__(self, text=None, filename=None, exclude=None):
  18         """
  19         Source can be provided as `text`, the text itself, or `filename`, from
  20         which the text will be read.  Excluded lines are those that match
  21         `exclude`, a regex.
  22
  23         """
  24         assert text or filename, "CodeParser needs either text or filename"
  25         self.filename = filename or "<code>"
  26         self.text = text
  27         if not self.text:
  28             try:
  29                 sourcef = open_source(self.filename)
  30                 try:
  31                     self.text = sourcef.read()
  32                 finally:
  33                     sourcef.close()
  34             except IOError:
  35                 _, err, _ = sys.exc_info()
  36                 raise NoSource(
  37                     "No source for code: '%s': %s" % (self.filename, err)
  38                     )
  39
  40         # Scrap the BOM if it exists.
  41         if self.text and ord(self.text[0]) == 0xfeff:
  42             self.text = self.text[1:]
  43
  44         self.exclude = exclude
  45
  46         self.show_tokens = False
  47
  48         # The text lines of the parsed code.
  49         self.lines = self.text.split('\n')
  50
  51         # The line numbers of excluded lines of code.
  52         self.excluded = set()
  53
  54         # The line numbers of docstring lines.
  55         self.docstrings = set()
  56
  57         # The line numbers of class definitions.
  58         self.classdefs = set()
  59
  60         # A dict mapping line numbers to (lo,hi) for multi-line statements.
  61         self.multiline = {}
  62
  63         # The line numbers that start statements.
  64         self.statement_starts = set()
  65
  66         # Lazily-created ByteParser
  67         self._byte_parser = None
  68
  69     def _get_byte_parser(self):
  70         """Create a ByteParser on demand."""
  71         if not self._byte_parser:
  72             self._byte_parser = \
  73                             ByteParser(text=self.text, filename=self.filename)
  74         return self._byte_parser
  75     byte_parser = property(_get_byte_parser)
  76
  77     def lines_matching(self, *regexes):
  78         """Find the lines matching one of a list of regexes.
  79
  80         Returns a set of line numbers, the lines that contain a match for one
  81         of the regexes in `regexes`.  The entire line needn't match, just a
  82         part of it.
  83
  84         """
  85         regex_c = re.compile(join_regex(regexes))
  86         matches = set()
  87         for i, ltext in enumerate(self.lines):
  88             if regex_c.search(ltext):
  89                 matches.add(i+1)
  90         return matches
  91
  92     def _raw_parse(self):
  93         """Parse the source to find the interesting facts about its lines.
  94
  95         A handful of member fields are updated.
  96
  97         """
  98         # Find lines which match an exclusion pattern.
  99         if self.exclude:
 100             self.excluded = self.lines_matching(self.exclude)
 101
 102         # Tokenize, to find excluded suites, to find docstrings, and to find
 103         # multi-line statements.
 104         indent = 0
 105         exclude_indent = 0
 106         excluding = False
 107         prev_toktype = token.INDENT
 108         first_line = None
 109         empty = True
 110
 111         tokgen = generate_tokens(self.text)
 112         for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
 113             if self.show_tokens:                # pragma: not covered
 114                 print("%10s %5s %-20r %r" % (
 115                     tokenize.tok_name.get(toktype, toktype),
 116                     nice_pair((slineno, elineno)), ttext, ltext
 117                     ))
 118             if toktype == token.INDENT:
 119                 indent += 1
 120             elif toktype == token.DEDENT:
 121                 indent -= 1
 122             elif toktype == token.NAME and ttext == 'class':
 123                 # Class definitions look like branches in the byte code, so
 124                 # we need to exclude them.  The simplest way is to note the
 125                 # lines with the 'class' keyword.
 126                 self.classdefs.add(slineno)
 127             elif toktype == token.OP and ttext == ':':
 128                 if not excluding and elineno in self.excluded:
 129                     # Start excluding a suite.  We trigger off of the colon
 130                     # token so that the #pragma comment will be recognized on
 131                     # the same line as the colon.
 132                     exclude_indent = indent
 133                     excluding = True
 134             elif toktype == token.STRING and prev_toktype == token.INDENT:
 135                 # Strings that are first on an indented line are docstrings.
 136                 # (a trick from trace.py in the stdlib.) This works for
 137                 # 99.9999% of cases.  For the rest (!) see:
 138                 # http://stackoverflow.com/questions/1769332/x/1769794#1769794
 139                 self.docstrings.update(range(slineno, elineno+1))
 140             elif toktype == token.NEWLINE:
 141                 if first_line is not None and elineno != first_line:
 142                     # We're at the end of a line, and we've ended on a
 143                     # different line than the first line of the statement,
 144                     # so record a multi-line range.
 145                     rng = (first_line, elineno)
 146                     for l in range(first_line, elineno+1):
 147                         self.multiline[l] = rng
 148                 first_line = None
 149
 150             if ttext.strip() and toktype != tokenize.COMMENT:
 151                 # A non-whitespace token.
 152                 empty = False
 153                 if first_line is None:
 154                     # The token is not whitespace, and is the first in a
 155                     # statement.
 156                     first_line = slineno
 157                     # Check whether to end an excluded suite.
 158                     if excluding and indent <= exclude_indent:
 159                         excluding = False
 160                     if excluding:
 161                         self.excluded.add(elineno)
 162
 163             prev_toktype = toktype
 164
 165         # Find the starts of the executable statements.
 166         if not empty:
 167             self.statement_starts.update(self.byte_parser._find_statements())
 168
 169     def first_line(self, line):
 170         """Return the first line number of the statement including `line`."""
 171         rng = self.multiline.get(line)
 172         if rng:
 173             first_line = rng[0]
 174         else:
 175             first_line = line
 176         return first_line
 177
 178     def first_lines(self, lines, *ignores):
 179         """Map the line numbers in `lines` to the correct first line of the
 180         statement.
 181
 182         Skip any line mentioned in any of the sequences in `ignores`.
 183
 184         Returns a set of the first lines.
 185
 186         """
 187         ignore = set()
 188         for ign in ignores:
 189             ignore.update(ign)
 190         lset = set()
 191         for l in lines:
 192             if l in ignore:
 193                 continue
 194             new_l = self.first_line(l)
 195             if new_l not in ignore:
 196                 lset.add(new_l)
 197         return lset
 198
 199     def parse_source(self):
 200         """Parse source text to find executable lines, excluded lines, etc.
 201
 202         Return values are 1) a set of executable line numbers, and 2) a set of
 203         excluded line numbers.
 204
 205         Reported line numbers are normalized to the first line of multi-line
 206         statements.
 207
 208         """
 209         try:
 210             self._raw_parse()
 211         except (tokenize.TokenError, IndentationError):
 212             _, tokerr, _ = sys.exc_info()
 213             msg, lineno = tokerr.args
 214             raise NotPython(
 215                 "Couldn't parse '%s' as Python source: '%s' at %s" %
 216                     (self.filename, msg, lineno)
 217                 )
 218
 219         excluded_lines = self.first_lines(self.excluded)
 220         lines = self.first_lines(
 221             self.statement_starts,
 222             excluded_lines,
 223             self.docstrings
 224         )
 225
 226         return lines, excluded_lines
 227
 228     def arcs(self):
 229         """Get information about the arcs available in the code.
 230
 231         Returns a sorted list of line number pairs.  Line numbers have been
 232         normalized to the first line of multiline statements.
 233
 234         """
 235         all_arcs = []
 236         for l1, l2 in self.byte_parser._all_arcs():
 237             fl1 = self.first_line(l1)
 238             fl2 = self.first_line(l2)
 239             if fl1 != fl2:
 240                 all_arcs.append((fl1, fl2))
 241         return sorted(all_arcs)
 242     arcs = expensive(arcs)
 243
 244     def exit_counts(self):
 245         """Get a mapping from line numbers to count of exits from that line.
 246
 247         Excluded lines are excluded.
 248
 249         """
 250         excluded_lines = self.first_lines(self.excluded)
 251         exit_counts = {}
 252         for l1, l2 in self.arcs():
 253             if l1 < 0:
 254                 # Don't ever report -1 as a line number
 255                 continue
 256             if l1 in excluded_lines:
 257                 # Don't report excluded lines as line numbers.
 258                 continue
 259             if l2 in excluded_lines:
 260                 # Arcs to excluded lines shouldn't count.
 261                 continue
 262             if l1 not in exit_counts:
 263                 exit_counts[l1] = 0
 264             exit_counts[l1] += 1
 265
 266         # Class definitions have one extra exit, so remove one for each:
 267         for l in self.classdefs:
 268             # Ensure key is there: classdefs can include excluded lines.
 269             if l in exit_counts:
 270                 exit_counts[l] -= 1
 271
 272         return exit_counts
 273     exit_counts = expensive(exit_counts)
 274
 275
 276 ## Opcodes that guide the ByteParser.
 277
 278 def _opcode(name):
 279     """Return the opcode by name from the dis module."""
 280     return dis.opmap[name]
 281
 282 def _opcode_set(*names):
 283     """Return a set of opcodes by the names in `names`."""
 284     s = set()
 285     for name in names:
 286         try:
 287             s.add(_opcode(name))
 288         except KeyError:
 289             pass
 290     return s
 291
 292 # Opcodes that leave the code object.
 293 OPS_CODE_END = _opcode_set('RETURN_VALUE')
 294
 295 # Opcodes that unconditionally end the code chunk.
 296 OPS_CHUNK_END = _opcode_set(
 297     'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'RETURN_VALUE', 'RAISE_VARARGS',
 298     'BREAK_LOOP', 'CONTINUE_LOOP',
 299     )
 300
 301 # Opcodes that unconditionally begin a new code chunk.  By starting new chunks
 302 # with unconditional jump instructions, we neatly deal with jumps to jumps
 303 # properly.
 304 OPS_CHUNK_BEGIN = _opcode_set('JUMP_ABSOLUTE', 'JUMP_FORWARD')
 305
 306 # Opcodes that push a block on the block stack.
 307 OPS_PUSH_BLOCK = _opcode_set(
 308     'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY', 'SETUP_WITH'
 309     )
 310
 311 # Block types for exception handling.
 312 OPS_EXCEPT_BLOCKS = _opcode_set('SETUP_EXCEPT', 'SETUP_FINALLY')
 313
 314 # Opcodes that pop a block from the block stack.
 315 OPS_POP_BLOCK = _opcode_set('POP_BLOCK')
 316
 317 # Opcodes that have a jump destination, but aren't really a jump.
 318 OPS_NO_JUMP = OPS_PUSH_BLOCK
 319
 320 # Individual opcodes we need below.
 321 OP_BREAK_LOOP = _opcode('BREAK_LOOP')
 322 OP_END_FINALLY = _opcode('END_FINALLY')
 323 OP_COMPARE_OP = _opcode('COMPARE_OP')
 324 COMPARE_EXCEPTION = 10  # just have to get this const from the code.
 325 OP_LOAD_CONST = _opcode('LOAD_CONST')
 326 OP_RETURN_VALUE = _opcode('RETURN_VALUE')
 327
 328
 329 class ByteParser(object):
 330     """Parse byte codes to understand the structure of code."""
 331
 332     def __init__(self, code=None, text=None, filename=None):
 333         if code:
 334             self.code = code
 335             self.text = text
 336         else:
 337             if not text:
 338                 assert filename, "If no code or text, need a filename"
 339                 sourcef = open_source(filename)
 340                 try:
 341                     text = sourcef.read()
 342                 finally:
 343                     sourcef.close()
 344             self.text = text
 345
 346             try:
 347                 # Python 2.3 and 2.4 don't like partial last lines, so be sure
 348                 # the text ends nicely for them.
 349                 self.code = compile(text + '\n', filename, "exec")
 350             except SyntaxError:
 351                 _, synerr, _ = sys.exc_info()
 352                 raise NotPython(
 353                     "Couldn't parse '%s' as Python source: '%s' at line %d" %
 354                         (filename, synerr.msg, synerr.lineno)
 355                     )
 356
 357         # Alternative Python implementations don't always provide all the
 358         # attributes on code objects that we need to do the analysis.
 359         for attr in ['co_lnotab', 'co_firstlineno', 'co_consts', 'co_code']:
 360             if not hasattr(self.code, attr):
 361                 raise CoverageException(
 362                     "This implementation of Python doesn't support code "
 363                     "analysis.\n"
 364                     "Run coverage.py under CPython for this command."
 365                     )
 366
 367     def child_parsers(self):
 368         """Iterate over all the code objects nested within this one.
 369
 370         The iteration includes `self` as its first value.
 371
 372         """
 373         children = CodeObjects(self.code)
 374         return [ByteParser(code=c, text=self.text) for c in children]
 375
 376     def _bytes_lines(self):
 377         """Map byte offsets to line numbers in `code`.
 378
 379         Uses co_lnotab described in Python/compile.c to map byte offsets to
 380         line numbers.  Produces a sequence: (b0, l0), (b1, l1), ...
 381
 382         Only byte offsets that correspond to line numbers are included in the
 383         results.
 384
 385         """
 386         # Adapted from dis.py in the standard library.
 387         byte_increments = bytes_to_ints(self.code.co_lnotab[0::2])
 388         line_increments = bytes_to_ints(self.code.co_lnotab[1::2])
 389
 390         last_line_num = None
 391         line_num = self.code.co_firstlineno
 392         byte_num = 0
 393         for byte_incr, line_incr in zip(byte_increments, line_increments):
 394             if byte_incr:
 395                 if line_num != last_line_num:
 396                     yield (byte_num, line_num)
 397                     last_line_num = line_num
 398                 byte_num += byte_incr
 399             line_num += line_incr
 400         if line_num != last_line_num:
 401             yield (byte_num, line_num)
 402
 403     def _find_statements(self):
 404         """Find the statements in `self.code`.
 405
 406         Produce a sequence of line numbers that start statements.  Recurses
 407         into all code objects reachable from `self.code`.
 408
 409         """
 410         for bp in self.child_parsers():
 411             # Get all of the lineno information from this code.
 412             for _, l in bp._bytes_lines():
 413                 yield l
 414
 415     def _block_stack_repr(self, block_stack):
 416         """Get a string version of `block_stack`, for debugging."""
 417         blocks = ", ".join(
 418             ["(%s, %r)" % (dis.opname[b[0]], b[1]) for b in block_stack]
 419         )
 420         return "[" + blocks + "]"
 421
 422     def _split_into_chunks(self):
 423         """Split the code object into a list of `Chunk` objects.
 424
 425         Each chunk is only entered at its first instruction, though there can
 426         be many exits from a chunk.
 427
 428         Returns a list of `Chunk` objects.
 429
 430         """
 431         # The list of chunks so far, and the one we're working on.
 432         chunks = []
 433         chunk = None
 434
 435         # A dict mapping byte offsets of line starts to the line numbers.
 436         bytes_lines_map = dict(self._bytes_lines())
 437
 438         # The block stack: loops and try blocks get pushed here for the
 439         # implicit jumps that can occur.
 440         # Each entry is a tuple: (block type, destination)
 441         block_stack = []
 442
 443         # Some op codes are followed by branches that should be ignored.  This
 444         # is a count of how many ignores are left.
 445         ignore_branch = 0
 446
 447         # We have to handle the last two bytecodes specially.
 448         ult = penult = None
 449
 450         # Get a set of all of the jump-to points.
 451         jump_to = set()
 452         bytecodes = list(ByteCodes(self.code.co_code))
 453         for bc in bytecodes:
 454             if bc.jump_to >= 0:
 455                 jump_to.add(bc.jump_to)
 456
 457         chunk_lineno = 0
 458
 459         # Walk the byte codes building chunks.
 460         for bc in bytecodes:
 461             # Maybe have to start a new chunk
 462             start_new_chunk = False
 463             first_chunk = False
 464             if bc.offset in bytes_lines_map:
 465                 # Start a new chunk for each source line number.
 466                 start_new_chunk = True
 467                 chunk_lineno = bytes_lines_map[bc.offset]
 468                 first_chunk = True
 469             elif bc.offset in jump_to:
 470                 # To make chunks have a single entrance, we have to make a new
 471                 # chunk when we get to a place some bytecode jumps to.
 472                 start_new_chunk = True
 473             elif bc.op in OPS_CHUNK_BEGIN:
 474                 # Jumps deserve their own unnumbered chunk.  This fixes
 475                 # problems with jumps to jumps getting confused.
 476                 start_new_chunk = True
 477
 478             if not chunk or start_new_chunk:
 479                 if chunk:
 480                     chunk.exits.add(bc.offset)
 481                 chunk = Chunk(bc.offset, chunk_lineno, first_chunk)
 482                 chunks.append(chunk)
 483
 484             # Look at the opcode
 485             if bc.jump_to >= 0 and bc.op not in OPS_NO_JUMP:
 486                 if ignore_branch:
 487                     # Someone earlier wanted us to ignore this branch.
 488                     ignore_branch -= 1
 489                 else:
 490                     # The opcode has a jump, it's an exit for this chunk.
 491                     chunk.exits.add(bc.jump_to)
 492
 493             if bc.op in OPS_CODE_END:
 494                 # The opcode can exit the code object.
 495                 chunk.exits.add(-self.code.co_firstlineno)
 496             if bc.op in OPS_PUSH_BLOCK:
 497                 # The opcode adds a block to the block_stack.
 498                 block_stack.append((bc.op, bc.jump_to))
 499             if bc.op in OPS_POP_BLOCK:
 500                 # The opcode pops a block from the block stack.
 501                 block_stack.pop()
 502             if bc.op in OPS_CHUNK_END:
 503                 # This opcode forces the end of the chunk.
 504                 if bc.op == OP_BREAK_LOOP:
 505                     # A break is implicit: jump where the top of the
 506                     # block_stack points.
 507                     chunk.exits.add(block_stack[-1][1])
 508                 chunk = None
 509             if bc.op == OP_END_FINALLY:
 510                 # For the finally clause we need to find the closest exception
 511                 # block, and use its jump target as an exit.
 512                 for block in reversed(block_stack):
 513                     if block[0] in OPS_EXCEPT_BLOCKS:
 514                         chunk.exits.add(block[1])
 515                         break
 516             if bc.op == OP_COMPARE_OP and bc.arg == COMPARE_EXCEPTION:
 517                 # This is an except clause.  We want to overlook the next
 518                 # branch, so that except's don't count as branches.
 519                 ignore_branch += 1
 520
 521             penult = ult
 522             ult = bc
 523
 524         if chunks:
 525             # The last two bytecodes could be a dummy "return None" that
 526             # shouldn't be counted as real code. Every Python code object seems
 527             # to end with a return, and a "return None" is inserted if there
 528             # isn't an explicit return in the source.
 529             if ult and penult:
 530                 if penult.op == OP_LOAD_CONST and ult.op == OP_RETURN_VALUE:
 531                     if self.code.co_consts[penult.arg] is None:
 532                         # This is "return None", but is it dummy?  A real line
 533                         # would be a last chunk all by itself.
 534                         if chunks[-1].byte != penult.offset:
 535                             ex = -self.code.co_firstlineno
 536                             # Split the last chunk
 537                             last_chunk = chunks[-1]
 538                             last_chunk.exits.remove(ex)
 539                             last_chunk.exits.add(penult.offset)
 540                             chunk = Chunk(
 541                                 penult.offset, last_chunk.line, False
 542                             )
 543                             chunk.exits.add(ex)
 544                             chunks.append(chunk)
 545
 546             # Give all the chunks a length.
 547             chunks[-1].length = bc.next_offset - chunks[-1].byte # pylint: disable=W0631,C0301
 548             for i in range(len(chunks)-1):
 549                 chunks[i].length = chunks[i+1].byte - chunks[i].byte
 550
 551         #self.validate_chunks(chunks)
 552         return chunks
 553
 554     def validate_chunks(self, chunks):
 555         """Validate the rule that chunks have a single entrance."""
 556         # starts is the entrances to the chunks
 557         starts = set([ch.byte for ch in chunks])
 558         for ch in chunks:
 559             assert all([(ex in starts or ex < 0) for ex in ch.exits])
 560
 561     def _arcs(self):
 562         """Find the executable arcs in the code.
 563
 564         Yields pairs: (from,to).  From and to are integer line numbers.  If
 565         from is < 0, then the arc is an entrance into the code object.  If to
 566         is < 0, the arc is an exit from the code object.
 567
 568         """
 569         chunks = self._split_into_chunks()
 570
 571         # A map from byte offsets to chunks jumped into.
 572         byte_chunks = dict([(c.byte, c) for c in chunks])
 573
 574         # There's always an entrance at the first chunk.
 575         yield (-1, byte_chunks[0].line)
 576
 577         # Traverse from the first chunk in each line, and yield arcs where
 578         # the trace function will be invoked.
 579         for chunk in chunks:
 580             if not chunk.first:
 581                 continue
 582
 583             chunks_considered = set()
 584             chunks_to_consider = [chunk]
 585             while chunks_to_consider:
 586                 # Get the chunk we're considering, and make sure we don't
 587                 # consider it again
 588                 this_chunk = chunks_to_consider.pop()
 589                 chunks_considered.add(this_chunk)
 590
 591                 # For each exit, add the line number if the trace function
 592                 # would be triggered, or add the chunk to those being
 593                 # considered if not.
 594                 for ex in this_chunk.exits:
 595                     if ex < 0:
 596                         yield (chunk.line, ex)
 597                     else:
 598                         next_chunk = byte_chunks[ex]
 599                         if next_chunk in chunks_considered:
 600                             continue
 601
 602                         # The trace function is invoked if visiting the first
 603                         # bytecode in a line, or if the transition is a
 604                         # backward jump.
 605                         backward_jump = next_chunk.byte < this_chunk.byte
 606                         if next_chunk.first or backward_jump:
 607                             if next_chunk.line != chunk.line:
 608                                 yield (chunk.line, next_chunk.line)
 609                         else:
 610                             chunks_to_consider.append(next_chunk)
 611
 612     def _all_chunks(self):
 613         """Returns a list of `Chunk` objects for this code and its children.
 614
 615         See `_split_into_chunks` for details.
 616
 617         """
 618         chunks = []
 619         for bp in self.child_parsers():
 620             chunks.extend(bp._split_into_chunks())
 621
 622         return chunks
 623
 624     def _all_arcs(self):
 625         """Get the set of all arcs in this code object and its children.
 626
 627         See `_arcs` for details.
 628
 629         """
 630         arcs = set()
 631         for bp in self.child_parsers():
 632             arcs.update(bp._arcs())
 633
 634         return arcs
 635
 636
 637 class Chunk(object):
 638     """A sequence of byte codes with a single entrance.
 639
 640     To analyze byte code, we have to divide it into chunks, sequences of byte
 641     codes such that each chunk has only one entrance, the first instruction in
 642     the block.
 643
 644     This is almost the CS concept of `basic block`_, except that we're willing
 645     to have many exits from a chunk, and "basic block" is a more cumbersome
 646     term.
 647
 648     .. _basic block: http://en.wikipedia.org/wiki/Basic_block
 649
 650     `line` is the source line number containing this chunk.
 651
 652     `first` is true if this is the first chunk in the source line.
 653
 654     An exit < 0 means the chunk can leave the code (return).  The exit is
 655     the negative of the starting line number of the code block.
 656
 657     """
 658     def __init__(self, byte, line, first):
 659         self.byte = byte
 660         self.line = line
 661         self.first = first
 662         self.length = 0
 663         self.exits = set()
 664
 665     def __repr__(self):
 666         if self.first:
 667             bang = "!"
 668         else:
 669             bang = ""
 670         return "<%d+%d @%d%s %r>" % (
 671             self.byte, self.length, self.line, bang, list(self.exits)
 672             )
 673
 674
 675 class CachedTokenizer(object):
 676     """A one-element cache around tokenize.generate_tokens.
 677
 678     When reporting, coverage.py tokenizes files twice, once to find the
 679     structure of the file, and once to syntax-color it.  Tokenizing is
 680     expensive, and easily cached.
 681
 682     This is a one-element cache so that our twice-in-a-row tokenizing doesn't
 683     actually tokenize twice.
 684
 685     """
 686     def __init__(self):
 687         self.last_text = None
 688         self.last_tokens = None
 689
 690     def generate_tokens(self, text):
 691         """A stand-in for `tokenize.generate_tokens`."""
 692         if text != self.last_text:
 693             self.last_text = text
 694             self.last_tokens = list(
 695                 tokenize.generate_tokens(StringIO(text).readline)
 696             )
 697         return self.last_tokens
 698
 699 # Create our generate_tokens cache as a callable replacement function.
 700 generate_tokens = CachedTokenizer().generate_tokens