1 # -*- coding: iso-8859-1 -*-
2 """A lexical analyzer class for simple shell-like syntaxes."""
4 # Module and documentation by Eric S. Raymond, 21 Dec 1998
5 # Input stacking and error message cleanup added by ESR, March 2000
6 # push_source() and pop_source() made explicit by ESR, January 2001.
7 # Posix compliance, split(), string arguments, and
8 # iterator interface by Gustavo Niemeyer, April 2003.
14 from cStringIO
import StringIO
16 from StringIO
import StringIO
18 __all__
= ["shlex", "split"]
21 "A lexical analyzer class for simple shell-like syntaxes."
22 def __init__(self
, instream
=None, infile
=None, posix
=False):
23 if isinstance(instream
, basestring
):
24 instream
= StringIO(instream
)
25 if instream
is not None:
26 self
.instream
= instream
29 self
.instream
= sys
.stdin
37 self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
38 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
40 self
.wordchars
+= ('ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
41 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ')
42 self
.whitespace
= ' \t\r\n'
43 self
.whitespace_split
= False
46 self
.escapedquotes
= '"'
55 print 'shlex: reading from %s, line %d' \
56 % (self
.instream
, self
.lineno
)
58 def push_token(self
, tok
):
59 "Push a token onto the stack popped by the get_token method"
61 print "shlex: pushing token " + `tok`
62 self
.pushback
.insert(0, tok
)
64 def push_source(self
, newstream
, newfile
=None):
65 "Push an input source onto the lexer's input source stack."
66 if isinstance(newstream
, basestring
):
67 newstream
= StringIO(newstream
)
68 self
.filestack
.insert(0, (self
.infile
, self
.instream
, self
.lineno
))
70 self
.instream
= newstream
73 if newfile
is not None:
74 print 'shlex: pushing to file %s' % (self
.infile
,)
76 print 'shlex: pushing to stream %s' % (self
.instream
,)
79 "Pop the input source stack."
81 (self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
[0]
82 self
.filestack
= self
.filestack
[1:]
84 print 'shlex: popping to %s, line %d' \
85 % (self
.instream
, self
.lineno
)
89 "Get a token from the input stream (or from stack if it's nonempty)"
91 tok
= self
.pushback
.pop(0)
93 print "shlex: popping token " + `tok`
95 # No pushback. Get a token.
96 raw
= self
.read_token()
98 if self
.source
is not None:
99 while raw
== self
.source
:
100 spec
= self
.sourcehook(self
.read_token())
102 (newfile
, newstream
) = spec
103 self
.push_source(newstream
, newfile
)
104 raw
= self
.get_token()
105 # Maybe we got EOF instead?
106 while raw
== self
.eof
:
107 if not self
.filestack
:
111 raw
= self
.get_token()
112 # Neither inclusion nor EOF
115 print "shlex: token=" + `raw`
117 print "shlex: token=EOF"
120 def read_token(self
):
124 nextchar
= self
.instream
.read(1)
126 self
.lineno
= self
.lineno
+ 1
128 print "shlex: in state", repr(self
.state
), \
129 "I see character:", repr(nextchar
)
130 if self
.state
is None:
131 self
.token
= '' # past end of file
133 elif self
.state
== ' ':
135 self
.state
= None # end of file
137 elif nextchar
in self
.whitespace
:
139 print "shlex: I see whitespace in whitespace state"
140 if self
.token
or (self
.posix
and quoted
):
141 break # emit current token
144 elif nextchar
in self
.commenters
:
145 self
.instream
.readline()
146 self
.lineno
= self
.lineno
+ 1
147 elif self
.posix
and nextchar
in self
.escape
:
149 self
.state
= nextchar
150 elif nextchar
in self
.wordchars
:
151 self
.token
= nextchar
153 elif nextchar
in self
.quotes
:
155 self
.token
= nextchar
156 self
.state
= nextchar
157 elif self
.whitespace_split
:
158 self
.token
= nextchar
161 self
.token
= nextchar
162 if self
.token
or (self
.posix
and quoted
):
163 break # emit current token
166 elif self
.state
in self
.quotes
:
168 if not nextchar
: # end of file
170 print "shlex: I see EOF in quotes state"
171 # XXX what error should be raised here?
172 raise ValueError, "No closing quotation"
173 if nextchar
== self
.state
:
175 self
.token
= self
.token
+ nextchar
180 elif self
.posix
and nextchar
in self
.escape
and \
181 self
.state
in self
.escapedquotes
:
182 escapedstate
= self
.state
183 self
.state
= nextchar
185 self
.token
= self
.token
+ nextchar
186 elif self
.state
in self
.escape
:
187 if not nextchar
: # end of file
189 print "shlex: I see EOF in escape state"
190 # XXX what error should be raised here?
191 raise ValueError, "No escaped character"
192 # In posix shells, only the quote itself or the escape
193 # character may be escaped within quotes.
194 if escapedstate
in self
.quotes
and \
195 nextchar
!= self
.state
and nextchar
!= escapedstate
:
196 self
.token
= self
.token
+ self
.state
197 self
.token
= self
.token
+ nextchar
198 self
.state
= escapedstate
199 elif self
.state
== 'a':
201 self
.state
= None # end of file
203 elif nextchar
in self
.whitespace
:
205 print "shlex: I see whitespace in word state"
207 if self
.token
or (self
.posix
and quoted
):
208 break # emit current token
211 elif nextchar
in self
.commenters
:
212 self
.instream
.readline()
213 self
.lineno
= self
.lineno
+ 1
216 if self
.token
or (self
.posix
and quoted
):
217 break # emit current token
220 elif self
.posix
and nextchar
in self
.quotes
:
221 self
.state
= nextchar
222 elif self
.posix
and nextchar
in self
.escape
:
224 self
.state
= nextchar
225 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes \
226 or self
.whitespace_split
:
227 self
.token
= self
.token
+ nextchar
229 self
.pushback
.insert(0, nextchar
)
231 print "shlex: I see punctuation in word state"
234 break # emit current token
239 if self
.posix
and not quoted
and result
== '':
243 print "shlex: raw token=" + `result`
245 print "shlex: raw token=EOF"
248 def sourcehook(self
, newfile
):
249 "Hook called on a filename to be sourced."
250 if newfile
[0] == '"':
251 newfile
= newfile
[1:-1]
252 # This implements cpp-like semantics for relative-path inclusion.
253 if isinstance(self
.infile
, basestring
) and not os
.path
.isabs(newfile
):
254 newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
255 return (newfile
, open(newfile
, "r"))
257 def error_leader(self
, infile
=None, lineno
=None):
258 "Emit a C-compiler-like, Emacs-friendly error-message leader."
263 return "\"%s\", line %d: " % (infile
, lineno
)
269 token
= self
.get_token()
270 if token
== self
.eof
:
274 def split(s
, comments
=False):
275 lex
= shlex(s
, posix
=True)
276 lex
.whitespace_split
= True
281 if __name__
== '__main__':
282 if len(sys
.argv
) == 1:
286 lexer
= shlex(open(file), file)
288 tt
= lexer
.get_token()
290 print "Token: " + repr(tt
)