3 provides the TextFile class, which gives an interface to text files
4 that (optionally) takes care of stripping comments, ignoring blank
5 lines, and joining lines with backslashes."""
7 # created 1999/01/12, Greg Ward
12 import sys
, os
, string
17 """Provides a file-like object that takes care of all the things you
18 commonly want to do when processing a text file that has some
19 line-by-line syntax: strip comments (as long as "#" is your
20 comment character), skip blank lines, join adjacent lines by
21 escaping the newline (ie. backslash at end of line), strip
22 leading and/or trailing whitespace. All of these are optional
23 and independently controllable.
25 Provides a 'warn()' method so you can generate warning messages that
26 report physical line number, even if the logical line in question
27 spans multiple physical lines. Also provides 'unreadline()' for
28 implementing line-at-a-time lookahead.
30 Constructor is called as:
32 TextFile (filename=None, file=None, **options)
34 It bombs (RuntimeError) if both 'filename' and 'file' are None;
35 'filename' should be a string, and 'file' a file object (or
36 something that provides 'readline()' and 'close()' methods). It is
37 recommended that you supply at least 'filename', so that TextFile
38 can include it in warning messages. If 'file' is not supplied,
39 TextFile creates its own using the 'open()' builtin.
41 The options are all boolean, and affect the value returned by
43 strip_comments [default: true]
44 strip from "#" to end-of-line, as well as any whitespace
45 leading up to the "#" -- unless it is escaped by a backslash
46 lstrip_ws [default: false]
47 strip leading whitespace from each line before returning it
48 rstrip_ws [default: true]
49 strip trailing whitespace (including line terminator!) from
50 each line before returning it
51 skip_blanks [default: true}
52 skip lines that are empty *after* stripping comments and
53 whitespace. (If both lstrip_ws and rstrip_ws are false,
54 then some lines may consist of solely whitespace: these will
55 *not* be skipped, even if 'skip_blanks' is true.)
56 join_lines [default: false]
57 if a backslash is the last non-newline character on a line
58 after stripping comments and whitespace, join the following line
59 to it to form one "logical line"; if N consecutive lines end
60 with a backslash, then N+1 physical lines will be joined to
61 form one logical line.
62 collapse_join [default: false]
63 strip leading whitespace from lines that are joined to their
64 predecessor; only matters if (join_lines and not lstrip_ws)
66 Note that since 'rstrip_ws' can strip the trailing newline, the
67 semantics of 'readline()' must differ from those of the builtin file
68 object's 'readline()' method! In particular, 'readline()' returns
69 None for end-of-file: an empty string might just be a blank line (or
70 an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
73 default_options
= { 'strip_comments': 1,
81 def __init__ (self
, filename
=None, file=None, **options
):
82 """Construct a new TextFile object. At least one of 'filename'
83 (a string) and 'file' (a file-like object) must be supplied.
84 They keyword argument options are described above and affect
85 the values returned by 'readline()'."""
87 if filename
is None and file is None:
89 "you must supply either or both of 'filename' and 'file'"
91 # set values for all options -- either from client option hash
92 # or fallback to default_options
93 for opt
in self
.default_options
.keys():
94 if options
.has_key (opt
):
95 setattr (self
, opt
, options
[opt
])
98 setattr (self
, opt
, self
.default_options
[opt
])
100 # sanity check client option hash
101 for opt
in options
.keys():
102 if not self
.default_options
.has_key (opt
):
103 raise KeyError, "invalid TextFile option '%s'" % opt
108 self
.filename
= filename
110 self
.current_line
= 0 # assuming that file is at BOF!
112 # 'linebuf' is a stack of lines that will be emptied before we
113 # actually read from the file; it's only populated by an
114 # 'unreadline()' operation
118 def open (self
, filename
):
119 """Open a new file named 'filename'. This overrides both the
120 'filename' and 'file' arguments to the constructor."""
122 self
.filename
= filename
123 self
.file = open (self
.filename
, 'r')
124 self
.current_line
= 0
128 """Close the current file and forget everything we know about it
129 (filename, current line number)."""
134 self
.current_line
= None
137 def gen_error (self
, msg
, line
=None):
140 line
= self
.current_line
141 outmsg
.append(self
.filename
+ ", ")
142 if type (line
) in (ListType
, TupleType
):
143 outmsg
.append("lines %d-%d: " % tuple (line
))
145 outmsg
.append("line %d: " % line
)
146 outmsg
.append(str(msg
))
147 return string
.join(outmsg
, "")
150 def error (self
, msg
, line
=None):
151 raise ValueError, "error: " + self
.gen_error(msg
, line
)
153 def warn (self
, msg
, line
=None):
154 """Print (to stderr) a warning message tied to the current logical
155 line in the current file. If the current logical line in the
156 file spans multiple physical lines, the warning refers to the
157 whole range, eg. "lines 3-5". If 'line' supplied, it overrides
158 the current line number; it may be a list or tuple to indicate a
159 range of physical lines, or an integer for a single physical
161 sys
.stderr
.write("warning: " + self
.gen_error(msg
, line
) + "\n")
165 """Read and return a single logical line from the current file (or
166 from an internal buffer if lines have previously been "unread"
167 with 'unreadline()'). If the 'join_lines' option is true, this
168 may involve reading multiple physical lines concatenated into a
169 single string. Updates the current line number, so calling
170 'warn()' after 'readline()' emits a warning about the physical
171 line(s) just read. Returns None on end-of-file, since the empty
172 string can occur if 'rstrip_ws' is true but 'strip_blanks' is
175 # If any "unread" lines waiting in 'linebuf', return the top
176 # one. (We don't actually buffer read-ahead data -- lines only
177 # get put in 'linebuf' if the client explicitly does an
180 line
= self
.linebuf
[-1]
187 # read the line, make it None if EOF
188 line
= self
.file.readline()
189 if line
== '': line
= None
191 if self
.strip_comments
and line
:
193 # Look for the first "#" in the line. If none, never
194 # mind. If we find one and it's the first character, or
195 # is not preceded by "\", then it starts a comment --
196 # strip the comment, strip whitespace before it, and
197 # carry on. Otherwise, it's just an escaped "#", so
198 # unescape it (and any other escaped "#"'s that might be
199 # lurking in there) and otherwise leave the line alone.
201 pos
= string
.find (line
, "#")
202 if pos
== -1: # no "#" -- no comments
205 # It's definitely a comment -- either "#" is the first
206 # character, or it's elsewhere and unescaped.
207 elif pos
== 0 or line
[pos
-1] != "\\":
208 # Have to preserve the trailing newline, because it's
209 # the job of a later step (rstrip_ws) to remove it --
210 # and if rstrip_ws is false, we'd better preserve it!
211 # (NB. this means that if the final line is all comment
212 # and has no trailing newline, we will think that it's
213 # EOF; I think that's OK.)
214 eol
= (line
[-1] == '\n') and '\n' or ''
215 line
= line
[0:pos
] + eol
217 # If all that's left is whitespace, then skip line
218 # *now*, before we try to join it to 'buildup_line' --
219 # that way constructs like
221 # # comment that should be ignored
223 # result in "hello there".
224 if string
.strip(line
) == "":
227 else: # it's an escaped "#"
228 line
= string
.replace (line
, "\\#", "#")
231 # did previous line end with a backslash? then accumulate
232 if self
.join_lines
and buildup_line
:
235 self
.warn ("continuation line immediately precedes "
239 if self
.collapse_join
:
240 line
= string
.lstrip (line
)
241 line
= buildup_line
+ line
243 # careful: pay attention to line number when incrementing it
244 if type (self
.current_line
) is ListType
:
245 self
.current_line
[1] = self
.current_line
[1] + 1
247 self
.current_line
= [self
.current_line
,
249 # just an ordinary line, read it as usual
251 if line
is None: # eof
254 # still have to be careful about incrementing the line number!
255 if type (self
.current_line
) is ListType
:
256 self
.current_line
= self
.current_line
[1] + 1
258 self
.current_line
= self
.current_line
+ 1
261 # strip whitespace however the client wants (leading and
262 # trailing, or one or the other, or neither)
263 if self
.lstrip_ws
and self
.rstrip_ws
:
264 line
= string
.strip (line
)
266 line
= string
.lstrip (line
)
268 line
= string
.rstrip (line
)
270 # blank line (whether we rstrip'ed or not)? skip to next line
272 if (line
== '' or line
== '\n') and self
.skip_blanks
:
277 buildup_line
= line
[:-1]
280 if line
[-2:] == '\\\n':
281 buildup_line
= line
[0:-2] + '\n'
284 # well, I guess there's some actual content there: return it
290 def readlines (self
):
291 """Read and return the list of all logical lines remaining in the
296 line
= self
.readline()
302 def unreadline (self
, line
):
303 """Push 'line' (a string) onto an internal buffer that will be
304 checked by future 'readline()' calls. Handy for implementing
305 a parser with line-at-a-time lookahead."""
307 self
.linebuf
.append (line
)
310 if __name__
== "__main__":
311 test_data
= """# test file
314 # intervening comment
315 continues on next line
317 # result 1: no fancy options
318 result1
= map (lambda x
: x
+ "\n", string
.split (test_data
, "\n")[0:-1])
320 # result 2: just strip comments
323 " continues on next line\n"]
325 # result 3: just strip blank lines
326 result3
= ["# test file\n",
328 "# intervening comment\n",
329 " continues on next line\n"]
331 # result 4: default, strip comments, blank lines, and trailing whitespace
332 result4
= ["line 3 \\",
333 " continues on next line"]
335 # result 5: strip comments and blanks, plus join lines (but don't
336 # "collapse" joined lines
337 result5
= ["line 3 continues on next line"]
339 # result 6: strip comments and blanks, plus join lines (and
340 # "collapse" joined lines
341 result6
= ["line 3 continues on next line"]
343 def test_input (count
, description
, file, expected_result
):
344 result
= file.readlines ()
345 # result = string.join (result, '')
346 if result
== expected_result
:
347 print "ok %d (%s)" % (count
, description
)
349 print "not ok %d (%s):" % (count
, description
)
351 print expected_result
356 filename
= "test.txt"
357 out_file
= open (filename
, "w")
358 out_file
.write (test_data
)
361 in_file
= TextFile (filename
, strip_comments
=0, skip_blanks
=0,
362 lstrip_ws
=0, rstrip_ws
=0)
363 test_input (1, "no processing", in_file
, result1
)
365 in_file
= TextFile (filename
, strip_comments
=1, skip_blanks
=0,
366 lstrip_ws
=0, rstrip_ws
=0)
367 test_input (2, "strip comments", in_file
, result2
)
369 in_file
= TextFile (filename
, strip_comments
=0, skip_blanks
=1,
370 lstrip_ws
=0, rstrip_ws
=0)
371 test_input (3, "strip blanks", in_file
, result3
)
373 in_file
= TextFile (filename
)
374 test_input (4, "default processing", in_file
, result4
)
376 in_file
= TextFile (filename
, strip_comments
=1, skip_blanks
=1,
377 join_lines
=1, rstrip_ws
=1)
378 test_input (5, "join lines without collapsing", in_file
, result5
)
380 in_file
= TextFile (filename
, strip_comments
=1, skip_blanks
=1,
381 join_lines
=1, rstrip_ws
=1, collapse_join
=1)
382 test_input (6, "join lines with collapsing", in_file
, result6
)