Add long running gmail memory benchmark for background tab.
[chromium-blink-merge.git] / tools / telemetry / catapult_base / refactor / offset_token.py
blob2578f854864e1e7d32ab117ac731167a55b634b0
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import collections
6 import itertools
7 import token
8 import tokenize
11 def _Pairwise(iterable):
12 """s -> (None, s0), (s0, s1), (s1, s2), (s2, s3), ..."""
13 a, b = itertools.tee(iterable)
14 a = itertools.chain((None,), a)
15 return itertools.izip(a, b)
18 class OffsetToken(object):
19 """A Python token with a relative position.
21 A token is represented by a type defined in Python's token module, a string
22 representing the content, and an offset. Using relative positions makes it
23 easy to insert and remove tokens.
24 """
25 def __init__(self, token_type, string, offset):
26 self._type = token_type
27 self._string = string
28 self._offset = offset
30 @property
31 def type(self):
32 return self._type
34 @property
35 def type_name(self):
36 return token.tok_name[self._type]
38 @property
39 def string(self):
40 return self._string
42 @string.setter
43 def string(self, value):
44 self._string = value
46 @property
47 def offset(self):
48 return self._offset
50 def __str__(self):
51 return str((self.type_name, self.string, self.offset))
54 def Tokenize(f):
55 """Read tokens from a file-like object.
57 Args:
58 f: Any object that has a readline method.
60 Returns:
61 A collections.deque containing OffsetTokens. Deques are cheaper and easier
62 to manipulate sequentially than lists.
63 """
64 f.seek(0)
65 tokenize_tokens = tokenize.generate_tokens(f.readline)
67 offset_tokens = collections.deque()
68 for prev_token, next_token in _Pairwise(tokenize_tokens):
69 token_type, string, (srow, scol), _, _ = next_token
70 if not prev_token:
71 offset_tokens.append(OffsetToken(token_type, string, (0, 0)))
72 else:
73 erow, ecol = prev_token[3]
74 if erow == srow:
75 offset_tokens.append(OffsetToken(token_type, string, (0, scol-ecol)))
76 else:
77 offset_tokens.append(OffsetToken(token_type, string, (srow-erow, scol)))
79 return offset_tokens
82 def Untokenize(offset_tokens):
83 """Return the string representation of an iterable of OffsetTokens."""
84 # Make a copy. Don't modify the original.
85 offset_tokens = collections.deque(offset_tokens)
87 # Strip leading NL tokens.
88 while offset_tokens[0].type == tokenize.NL:
89 offset_tokens.popleft()
91 # Strip leading vertical whitespace.
92 first_token = offset_tokens.popleft()
93 # Take care not to modify the existing token. Create a new one in its place.
94 first_token = OffsetToken(first_token.type, first_token.string,
95 (0, first_token.offset[1]))
96 offset_tokens.appendleft(first_token)
98 # Convert OffsetTokens to tokenize tokens.
99 tokenize_tokens = []
100 row = 1
101 col = 0
102 for t in offset_tokens:
103 offset_row, offset_col = t.offset
104 if offset_row == 0:
105 col += offset_col
106 else:
107 row += offset_row
108 col = offset_col
109 tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None))
111 # tokenize can't handle whitespace before line continuations.
112 # So add a space.
113 return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')