1 """A lexical analyzer class for simple shell-like syntaxes."""
3 # Module and documentation by Eric S. Raymond, 21 Dec 1998
4 # Input stacking and error message cleanup added by ESR, March 2000
5 # push_source() and pop_source() made explicit by ESR, January 2001.
13 "A lexical analyzer class for simple shell-like syntaxes."
14 def __init__(self
, instream
=None, infile
=None):
16 self
.instream
= instream
19 self
.instream
= sys
.stdin
22 self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
23 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
24 self
.whitespace
= ' \t\r\n'
34 print 'shlex: reading from %s, line %d' \
35 % (self
.instream
, self
.lineno
)
37 def push_token(self
, tok
):
38 "Push a token onto the stack popped by the get_token method"
40 print "shlex: pushing token " + `tok`
41 self
.pushback
= [tok
] + self
.pushback
43 def push_source(self
, newstream
, newfile
=None):
44 "Push an input source onto the lexer's input source stack."
45 self
.filestack
.insert(0, (self
.infile
, self
.instream
, self
.lineno
))
47 self
.instream
= newstream
51 print 'shlex: pushing to file %s' % (self
.infile
,)
53 print 'shlex: pushing to stream %s' % (self
.instream
,)
56 "Pop the input source stack."
58 (self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
[0]
59 self
.filestack
= self
.filestack
[1:]
61 print 'shlex: popping to %s, line %d' \
62 % (self
.instream
, self
.lineno
)
66 "Get a token from the input stream (or from stack if it's nonempty)"
68 tok
= self
.pushback
[0]
69 self
.pushback
= self
.pushback
[1:]
71 print "shlex: popping token " + `tok`
73 # No pushback. Get a token.
74 raw
= self
.read_token()
76 while raw
== self
.source
:
77 spec
= self
.sourcehook(self
.read_token())
79 (newfile
, newstream
) = spec
80 self
.push_source(newstream
, newfile
)
81 raw
= self
.get_token()
82 # Maybe we got EOF instead?
84 if len(self
.filestack
) == 0:
88 raw
= self
.get_token()
89 # Neither inclusion nor EOF
92 print "shlex: token=" + `raw`
94 print "shlex: token=EOF"
98 "Read a token from the input stream (no pushback or inclusions)"
101 nextchar
= self
.instream
.read(1)
103 self
.lineno
= self
.lineno
+ 1
105 print "shlex: in state", repr(self
.state
), \
106 "I see character:", repr(nextchar
)
107 if self
.state
is None:
108 self
.token
= '' # past end of file
110 elif self
.state
== ' ':
112 self
.state
= None # end of file
114 elif nextchar
in self
.whitespace
:
116 print "shlex: I see whitespace in whitespace state"
118 break # emit current token
121 elif nextchar
in self
.commenters
:
122 self
.instream
.readline()
123 self
.lineno
= self
.lineno
+ 1
124 elif nextchar
in self
.wordchars
:
125 self
.token
= nextchar
127 elif nextchar
in self
.quotes
:
128 self
.token
= nextchar
129 self
.state
= nextchar
131 self
.token
= nextchar
133 break # emit current token
136 elif self
.state
in self
.quotes
:
137 self
.token
= self
.token
+ nextchar
138 if nextchar
== self
.state
:
141 elif not nextchar
: # end of file
143 print "shlex: I see EOF in quotes state"
144 # XXX what error should be raised here?
145 raise ValueError, "No closing quotation"
146 elif self
.state
== 'a':
148 self
.state
= None # end of file
150 elif nextchar
in self
.whitespace
:
152 print "shlex: I see whitespace in word state"
155 break # emit current token
158 elif nextchar
in self
.commenters
:
159 self
.instream
.readline()
160 self
.lineno
= self
.lineno
+ 1
161 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes
:
162 self
.token
= self
.token
+ nextchar
164 self
.pushback
= [nextchar
] + self
.pushback
166 print "shlex: I see punctuation in word state"
169 break # emit current token
176 print "shlex: raw token=" + `result`
178 print "shlex: raw token=EOF"
181 def sourcehook(self
, newfile
):
182 "Hook called on a filename to be sourced."
183 if newfile
[0] == '"':
184 newfile
= newfile
[1:-1]
185 # This implements cpp-like semantics for relative-path inclusion.
186 if type(self
.infile
) == type("") and not os
.path
.isabs(newfile
):
187 newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
188 return (newfile
, open(newfile
, "r"))
190 def error_leader(self
, infile
=None, lineno
=None):
191 "Emit a C-compiler-like, Emacs-friendly error-message leader."
196 return "\"%s\", line %d: " % (infile
, lineno
)
199 if __name__
== '__main__':
200 if len(sys
.argv
) == 1:
204 lexer
= shlex(open(file), file)
206 tt
= lexer
.get_token()
208 print "Token: " + repr(tt
)