3 # Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 # This file was originally based on portions of the blame.py script by
16 # -----------------------------------------------------------------------
22 token_term
= string
.whitespace
+ ";:"
24 token_term
= frozenset(token_term
)
28 # the algorithm is about the same speed for any CHUNK_SIZE chosen.
29 # grab a good-sized chunk, but not too large to overwhelm memory.
30 # note: we use a multiple of a standard block size
31 CHUNK_SIZE
= 192 * 512 # about 100k
33 # CHUNK_SIZE = 5 # for debugging, make the function grind...
35 def __init__(self
, file):
38 self
.buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
40 raise RuntimeError, 'EOF'
43 "Get the next token from the RCS file."
45 # Note: we can afford to loop within Python, examining individual
46 # characters. For the whitespace and tokens, the number of iterations
47 # is typically quite small. Thus, a simple iterative loop will beat
48 # out more complex solutions.
56 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
58 # signal EOF by returning None as the token
59 del self
.buf
# so we fail if get() is called again
64 if buf
[idx
] not in string
.whitespace
:
78 # find token characters in the current buffer
79 while end
< lbuf
and buf
[end
] not in self
.token_term
:
81 token
= token
+ buf
[idx
:end
]
84 # we stopped before the end, so we have a full token
88 # we stopped at the end of the buffer, so we may have a partial token
89 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
91 # signal EOF by returning None as the token
92 del self
.buf
# so we fail if get() is called again
101 # a "string" which starts with the "@" character. we'll skip it when we
102 # search for content.
110 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
112 raise RuntimeError, 'EOF'
114 i
= string
.find(buf
, '@', idx
)
116 chunks
.append(buf
[idx
:])
120 chunks
.append(buf
[idx
:i
])
122 buf
= '@' + self
.rcsfile
.read(self
.CHUNK_SIZE
)
124 raise RuntimeError, 'EOF'
127 if buf
[i
+ 1] == '@':
128 chunks
.append(buf
[idx
:i
+1])
132 chunks
.append(buf
[idx
:i
])
137 return string
.join(chunks
, '')
145 def match(self
, match
):
146 "Try to match the next token from the input buffer."
150 raise common
.RCSExpected(token
, match
)
152 def unget(self
, token
):
153 "Put this token back, for the next get() to return."
155 # Override the class' .get method with a function which clears the
156 # overridden method then returns the pushed token. Since this function
157 # will not be looked up via the class mechanism, it should be a "normal"
158 # function, meaning it won't have "self" automatically inserted.
159 # Therefore, we need to pass both self and the token thru via defaults.
161 # note: we don't put this into the input buffer because it may have been
162 # @-unescaped already.
164 def give_it_back(self
=self
, token
=token
):
168 self
.get
= give_it_back
170 def mget(self
, count
):
171 "Return multiple tokens. 'next' is at the end."
173 for i
in range(count
):
174 result
.append(self
.get())
179 class Parser(common
._Parser
):
180 stream_class
= _TokenStream