1 """A lexical analyzer class for simple shell-like syntaxes."""
3 # Module and documentation by Eric S. Raymond, 21 Dec 1998
4 # Input stacking and error message cleanup added by ESR, March 2000
5 # push_source() and pop_source() made explicit by ESR, January 2001.
13 "A lexical analyzer class for simple shell-like syntaxes."
14 def __init__(self
, instream
=None, infile
=None):
15 if instream
is not None:
16 self
.instream
= instream
19 self
.instream
= sys
.stdin
22 self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
23 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
24 self
.whitespace
= ' \t\r\n'
34 print 'shlex: reading from %s, line %d' \
35 % (self
.instream
, self
.lineno
)
37 def push_token(self
, tok
):
38 "Push a token onto the stack popped by the get_token method"
40 print "shlex: pushing token " + `tok`
41 self
.pushback
= [tok
] + self
.pushback
43 def push_source(self
, newstream
, newfile
=None):
44 "Push an input source onto the lexer's input source stack."
45 self
.filestack
.insert(0, (self
.infile
, self
.instream
, self
.lineno
))
47 self
.instream
= newstream
50 if newfile
is not None:
51 print 'shlex: pushing to file %s' % (self
.infile
,)
53 print 'shlex: pushing to stream %s' % (self
.instream
,)
56 "Pop the input source stack."
58 (self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
[0]
59 self
.filestack
= self
.filestack
[1:]
61 print 'shlex: popping to %s, line %d' \
62 % (self
.instream
, self
.lineno
)
66 "Get a token from the input stream (or from stack if it's nonempty)"
68 tok
= self
.pushback
[0]
69 self
.pushback
= self
.pushback
[1:]
71 print "shlex: popping token " + `tok`
73 # No pushback. Get a token.
74 raw
= self
.read_token()
76 while raw
== self
.source
:
77 spec
= self
.sourcehook(self
.read_token())
79 (newfile
, newstream
) = spec
80 self
.push_source(newstream
, newfile
)
81 raw
= self
.get_token()
82 # Maybe we got EOF instead?
84 if len(self
.filestack
) == 0:
88 raw
= self
.get_token()
89 # Neither inclusion nor EOF
92 print "shlex: token=" + `raw`
94 print "shlex: token=EOF"
98 "Read a token from the input stream (no pushback or inclusions)"
100 nextchar
= self
.instream
.read(1)
102 self
.lineno
= self
.lineno
+ 1
104 print "shlex: in state", repr(self
.state
), \
105 "I see character:", repr(nextchar
)
106 if self
.state
is None:
107 self
.token
= '' # past end of file
109 elif self
.state
== ' ':
111 self
.state
= None # end of file
113 elif nextchar
in self
.whitespace
:
115 print "shlex: I see whitespace in whitespace state"
117 break # emit current token
120 elif nextchar
in self
.commenters
:
121 self
.instream
.readline()
122 self
.lineno
= self
.lineno
+ 1
123 elif nextchar
in self
.wordchars
:
124 self
.token
= nextchar
126 elif nextchar
in self
.quotes
:
127 self
.token
= nextchar
128 self
.state
= nextchar
130 self
.token
= nextchar
132 break # emit current token
135 elif self
.state
in self
.quotes
:
136 self
.token
= self
.token
+ nextchar
137 if nextchar
== self
.state
:
140 elif not nextchar
: # end of file
142 print "shlex: I see EOF in quotes state"
143 # XXX what error should be raised here?
144 raise ValueError, "No closing quotation"
145 elif self
.state
== 'a':
147 self
.state
= None # end of file
149 elif nextchar
in self
.whitespace
:
151 print "shlex: I see whitespace in word state"
154 break # emit current token
157 elif nextchar
in self
.commenters
:
158 self
.instream
.readline()
159 self
.lineno
= self
.lineno
+ 1
160 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes
:
161 self
.token
= self
.token
+ nextchar
163 self
.pushback
= [nextchar
] + self
.pushback
165 print "shlex: I see punctuation in word state"
168 break # emit current token
175 print "shlex: raw token=" + `result`
177 print "shlex: raw token=EOF"
180 def sourcehook(self
, newfile
):
181 "Hook called on a filename to be sourced."
182 if newfile
[0] == '"':
183 newfile
= newfile
[1:-1]
184 # This implements cpp-like semantics for relative-path inclusion.
185 if type(self
.infile
) == type("") and not os
.path
.isabs(newfile
):
186 newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
187 return (newfile
, open(newfile
, "r"))
189 def error_leader(self
, infile
=None, lineno
=None):
190 "Emit a C-compiler-like, Emacs-friendly error-message leader."
195 return "\"%s\", line %d: " % (infile
, lineno
)
198 if __name__
== '__main__':
199 if len(sys
.argv
) == 1:
203 lexer
= shlex(open(file), file)
205 tt
= lexer
.get_token()
207 print "Token: " + repr(tt
)