tools/telemetry/catapult_base/refactor/offset_token.py

   1 # Copyright 2015 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import collections
   6 import itertools
   7 import token
   8 import tokenize
   9
  10
  11 def _Pairwise(iterable):
  12   """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ..."""
  13   a, b = itertools.tee(iterable)
  14   a = itertools.chain((None,), a)
  15   return itertools.izip(a, b)
  16
  17
  18 class OffsetToken(object):
  19   """A Python token with a relative position.
  20
  21   A token is represented by a type defined in Python's token module, a string
  22   representing the content, and an offset. Using relative positions makes it
  23   easy to insert and remove tokens.
  24   """
  25   def __init__(self, token_type, string, offset):
  26     self._type = token_type
  27     self._string = string
  28     self._offset = offset
  29
  30   @property
  31   def type(self):
  32     return self._type
  33
  34   @property
  35   def type_name(self):
  36     return token.tok_name[self._type]
  37
  38   @property
  39   def string(self):
  40     return self._string
  41
  42   @string.setter
  43   def string(self, value):
  44     self._string = value
  45
  46   @property
  47   def offset(self):
  48     return self._offset
  49
  50   def __str__(self):
  51     return str((self.type_name, self.string, self.offset))
  52
  53
  54 def Tokenize(f):
  55   """Read tokens from a file-like object.
  56
  57   Args:
  58     f: Any object that has a readline method.
  59
  60   Returns:
  61     A collections.deque containing OffsetTokens. Deques are cheaper and easier
  62     to manipulate sequentially than lists.
  63   """
  64   f.seek(0)
  65   tokenize_tokens = tokenize.generate_tokens(f.readline)
  66
  67   offset_tokens = collections.deque()
  68   for prev_token, next_token in _Pairwise(tokenize_tokens):
  69     token_type, string, (srow, scol), _, _ = next_token
  70     if not prev_token:
  71       offset_tokens.append(OffsetToken(token_type, string, (0, 0)))
  72     else:
  73       erow, ecol = prev_token[3]
  74       if erow == srow:
  75         offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol)))
  76       else:
  77         offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol)))
  78
  79   return offset_tokens
  80
  81
  82 def Untokenize(offset_tokens):
  83   """Return the string representation of an iterable of OffsetTokens."""
  84   # Make a copy. Don't modify the original.
  85   offset_tokens = collections.deque(offset_tokens)
  86
  87   # Strip leading NL tokens.
  88   while offset_tokens[0].type == tokenize.NL:
  89     offset_tokens.popleft()
  90
  91   # Strip leading vertical whitespace.
  92   first_token = offset_tokens.popleft()
  93   # Take care not to modify the existing token. Create a new one in its place.
  94   first_token = OffsetToken(first_token.type, first_token.string,
  95                             (0, first_token.offset[1]))
  96   offset_tokens.appendleft(first_token)
  97
  98   # Convert OffsetTokens to tokenize tokens.
  99   tokenize_tokens = []
 100   row = 1
 101   col = 0
 102   for t in offset_tokens:
 103     offset_row, offset_col = t.offset
 104     if offset_row == 0:
 105       col += offset_col
 106     else:
 107       row += offset_row
 108       col = offset_col
 109     tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None))
 110
 111   # tokenize can't handle whitespace before line continuations.
 112   # So add a space.
 113   return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')