1 """A lexical analyzer class for simple shell-like syntaxes."""
3 # Module and documentation by Eric S. Raymond, 21 Dec 1998
4 # Input stacking and error message cleanup added by ESR, March 2000
11 "A lexical analyzer class for simple shell-like syntaxes."
12 def __init__(self
, instream
=None, infile
=None):
14 self
.instream
= instream
17 self
.instream
= sys
.stdin
20 self
.wordchars
= ('abcdfeghijklmnopqrstuvwxyz'
21 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_')
22 self
.whitespace
= ' \t\r\n'
32 print 'shlex: reading from %s, line %d' \
33 % (self
.instream
, self
.lineno
)
35 def push_token(self
, tok
):
36 "Push a token onto the stack popped by the get_token method"
38 print "shlex: pushing token " + `tok`
39 self
.pushback
= [tok
] + self
.pushback
;
42 "Get a token from the input stream (or from stack if it's nonempty)"
44 tok
= self
.pushback
[0]
45 self
.pushback
= self
.pushback
[1:]
47 print "shlex: popping token " + `tok`
49 # No pushback. Get a token.
50 raw
= self
.read_token()
52 while raw
== self
.source
:
53 (newfile
, newstream
) = self
.sourcehook(self
.read_token())
54 self
.filestack
.insert(0, (self
.infile
, self
.instream
, self
.lineno
))
56 self
.instream
= newstream
59 print 'shlex: pushing to file %s' % (self
.infile
,)
60 raw
= self
.get_token()
61 # Maybe we got EOF instead?
63 if len(self
.filestack
) == 0:
67 (self
.infile
, self
.instream
, self
.lineno
) = self
.filestack
[0]
68 self
.filestack
= self
.filestack
[1:]
70 print 'shlex: popping to %s, line %d' \
71 % (self
.instream
, self
.lineno
)
73 raw
= self
.get_token()
74 # Neither inclusion nor EOF
77 print "shlex: token=" + `raw`
79 print "shlex: token=EOF"
83 "Read a token from the input stream (no pushback or inclusions)"
86 nextchar
= self
.instream
.read(1);
88 self
.lineno
= self
.lineno
+ 1
90 print "shlex: in state", repr(self
.state
), \
91 "I see character:", repr(nextchar
)
92 if self
.state
is None:
93 self
.token
= ''; # past end of file
95 elif self
.state
== ' ':
97 self
.state
= None; # end of file
99 elif nextchar
in self
.whitespace
:
101 print "shlex: I see whitespace in whitespace state"
103 break # emit current token
106 elif nextchar
in self
.commenters
:
107 self
.instream
.readline()
108 self
.lineno
= self
.lineno
+ 1
109 elif nextchar
in self
.wordchars
:
110 self
.token
= nextchar
112 elif nextchar
in self
.quotes
:
113 self
.token
= nextchar
114 self
.state
= nextchar
116 self
.token
= nextchar
118 break # emit current token
121 elif self
.state
in self
.quotes
:
122 self
.token
= self
.token
+ nextchar
123 if nextchar
== self
.state
:
126 elif self
.state
== 'a':
128 self
.state
= None; # end of file
130 elif nextchar
in self
.whitespace
:
132 print "shlex: I see whitespace in word state"
135 break # emit current token
138 elif nextchar
in self
.commenters
:
139 self
.instream
.readline()
140 self
.lineno
= self
.lineno
+ 1
141 elif nextchar
in self
.wordchars
or nextchar
in self
.quotes
:
142 self
.token
= self
.token
+ nextchar
144 self
.pushback
= [nextchar
] + self
.pushback
146 print "shlex: I see punctuation in word state"
149 break # emit current token
156 print "shlex: raw token=" + `result`
158 print "shlex: raw token=EOF"
161 def sourcehook(self
, newfile
):
162 "Hook called on a filename to be sourced."
163 if newfile
[0] == '"':
164 newfile
= newfile
[1:-1]
165 # This implements cpp-like semantics for relative-path inclusion.
166 if type(self
.infile
) == type("") and not os
.path
.isabs(newfile
):
167 newfile
= os
.path
.join(os
.path
.dirname(self
.infile
), newfile
)
168 return (newfile
, open(newfile
, "r"))
170 def error_leader(self
, infile
=None, lineno
=None):
171 "Emit a C-compiler-like, Emacs-friendly error-message leader."
176 return "\"%s\", line %d: " % (infile
, lineno
)
179 if __name__
== '__main__':
180 if len(sys
.argv
) == 1:
184 lexer
= shlex(open(file), file)
186 tt
= lexer
.get_token()
188 print "Token: " + repr(tt
)