1 # Module and documentation by Eric S. Raymond, 21 Dec 1998
6 "A lexical analyzer class for simple shell-like syntaxes."
7 def __init__(self
, instream
=None):
9 self
.instream
= instream
11 self
.instream
= sys
.stdin
13 self
.wordchars
= 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
14 self
.whitespace
= ' \t\r\n'
22 def push_token(self
, tok
):
23 "Push a token onto the stack popped by the get_token method"
25 print "Pushing " + tok
26 self
.pushback
= [tok
] + self
.pushback
;
29 "Get a token from the input stream (or from stack if it's monempty)"
31 tok
= self
.pushback
[0]
32 self
.pushback
= self
.pushback
[1:]
34 print "Popping " + tok
38 nextchar
= self
.instream
.read(1);
40 self
.lineno
= self
.lineno
+ 1
42 print "In state " + repr(self
.state
) + " I see character: " + repr(nextchar
)
43 if self
.state
== None:
45 elif self
.state
== ' ':
47 self
.state
= None; # end of file
49 elif nextchar
in self
.whitespace
:
51 print "I see whitespace in whitespace state"
53 break # emit current token
56 elif nextchar
in self
.commenters
:
57 self
.instream
.readline()
58 self
.lineno
= self
.lineno
+ 1
59 elif nextchar
in self
.wordchars
:
62 elif nextchar
in self
.quotes
:
68 break # emit current token
71 elif self
.state
in self
.quotes
:
72 self
.token
= self
.token
+ nextchar
73 if nextchar
== self
.state
:
76 elif self
.state
== 'a':
78 self
.state
= None; # end of file
80 elif nextchar
in self
.whitespace
:
82 print "I see whitespace in word state"
85 break # emit current token
88 elif nextchar
in self
.commenters
:
89 self
.instream
.readline()
90 self
.lineno
= self
.lineno
+ 1
91 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes
:
92 self
.token
= self
.token
+ nextchar
94 self
.pushback
= [nextchar
] + self
.pushback
96 print "I see punctuation in word state"
99 break # emit current token
106 print "Token: " + result
109 if __name__
== '__main__':
113 tt
= lexer
.get_token()
115 print "Token: " + repr(tt
)