Updated for 2.1b2 distribution.
[python/dscho.git] / Lib / fileinput.py
blob794d575375ba3c5142a9767b21acf3bf35d20866
1 """Helper class to quickly write a loop over all standard input files.
3 Typical use is:
5 import fileinput
6 for line in fileinput.input():
7 process(line)
9 This iterates over the lines of all files listed in sys.argv[1:],
10 defaulting to sys.stdin if the list is empty. If a filename is '-' it
11 is also replaced by sys.stdin. To specify an alternative list of
12 filenames, pass it as the argument to input(). A single file name is
13 also allowed.
15 Functions filename(), lineno() return the filename and cumulative line
16 number of the line that has just been read; filelineno() returns its
17 line number in the current file; isfirstline() returns true iff the
18 line just read is the first line of its file; isstdin() returns true
19 iff the line was read from sys.stdin. Function nextfile() closes the
20 current file so that the next iteration will read the first line from
21 the next file (if any); lines not read from the file will not count
22 towards the cumulative line count; the filename is not changed until
23 after the first line of the next file has been read. Function close()
24 closes the sequence.
26 Before any lines have been read, filename() returns None and both line
27 numbers are zero; nextfile() has no effect. After all lines have been
28 read, filename() and the line number functions return the values
29 pertaining to the last line read; nextfile() has no effect.
31 All files are opened in text mode. If an I/O error occurs during
32 opening or reading a file, the IOError exception is raised.
34 If sys.stdin is used more than once, the second and further use will
35 return no lines, except perhaps for interactive use, or if it has been
36 explicitly reset (e.g. using sys.stdin.seek(0)).
38 Empty files are opened and immediately closed; the only time their
39 presence in the list of filenames is noticeable at all is when the
40 last file opened is empty.
42 It is possible that the last line of a file doesn't end in a newline
43 character; otherwise lines are returned including the trailing
44 newline.
46 Class FileInput is the implementation; its methods filename(),
47 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48 correspond to the functions in the module. In addition it has a
49 readline() method which returns the next input line, and a
50 __getitem__() method which implements the sequence behavior. The
51 sequence must be accessed in strictly sequential order; sequence
52 access and readline() cannot be mixed.
54 Optional in-place filtering: if the keyword argument inplace=1 is
55 passed to input() or to the FileInput constructor, the file is moved
56 to a backup file and standard output is directed to the input file.
57 This makes it possible to write a filter that rewrites its input file
58 in place. If the keyword argument backup=".<some extension>" is also
59 given, it specifies the extension for the backup file, and the backup
60 file remains around; by default, the extension is ".bak" and it is
61 deleted when the output file is closed. In-place filtering is
62 disabled when standard input is read. XXX The current implementation
63 does not work for MS-DOS 8+3 filesystems.
65 Performance: this module is unfortunately one of the slower ways of
66 processing large numbers of input lines. Nevertheless, a significant
67 speed-up has been obtained by using readlines(bufsize) instead of
68 readline(). A new keyword argument, bufsize=N, is present on the
69 input() function and the FileInput() class to override the default
70 buffer size.
72 XXX Possible additions:
74 - optional getopt argument processing
75 - specify open mode ('r' or 'rb')
76 - fileno()
77 - isatty()
78 - read(), read(size), even readlines()
80 """
82 import sys, os, stat
84 __all__ = ["input","close","nextfile","filename","lineno","filelineno",
85 "isfirstline","isstdin","FileInput"]
87 _state = None
89 DEFAULT_BUFSIZE = 8*1024
91 def input(files=None, inplace=0, backup="", bufsize=0):
92 global _state
93 if _state and _state._file:
94 raise RuntimeError, "input() already active"
95 _state = FileInput(files, inplace, backup, bufsize)
96 return _state
98 def close():
99 global _state
100 state = _state
101 _state = None
102 if state:
103 state.close()
105 def nextfile():
106 if not _state:
107 raise RuntimeError, "no active input()"
108 return _state.nextfile()
110 def filename():
111 if not _state:
112 raise RuntimeError, "no active input()"
113 return _state.filename()
115 def lineno():
116 if not _state:
117 raise RuntimeError, "no active input()"
118 return _state.lineno()
120 def filelineno():
121 if not _state:
122 raise RuntimeError, "no active input()"
123 return _state.filelineno()
125 def isfirstline():
126 if not _state:
127 raise RuntimeError, "no active input()"
128 return _state.isfirstline()
130 def isstdin():
131 if not _state:
132 raise RuntimeError, "no active input()"
133 return _state.isstdin()
135 class FileInput:
137 def __init__(self, files=None, inplace=0, backup="", bufsize=0):
138 if type(files) == type(''):
139 files = (files,)
140 else:
141 if files is None:
142 files = sys.argv[1:]
143 if not files:
144 files = ('-',)
145 else:
146 files = tuple(files)
147 self._files = files
148 self._inplace = inplace
149 self._backup = backup
150 self._bufsize = bufsize or DEFAULT_BUFSIZE
151 self._savestdout = None
152 self._output = None
153 self._filename = None
154 self._lineno = 0
155 self._filelineno = 0
156 self._file = None
157 self._isstdin = 0
158 self._backupfilename = None
159 self._buffer = []
160 self._bufindex = 0
162 def __del__(self):
163 self.close()
165 def close(self):
166 self.nextfile()
167 self._files = ()
169 def __getitem__(self, i):
170 try:
171 line = self._buffer[self._bufindex]
172 except IndexError:
173 pass
174 else:
175 self._bufindex += 1
176 self._lineno += 1
177 self._filelineno += 1
178 return line
179 if i != self._lineno:
180 raise RuntimeError, "accessing lines out of order"
181 line = self.readline()
182 if not line:
183 raise IndexError, "end of input reached"
184 return line
186 def nextfile(self):
187 savestdout = self._savestdout
188 self._savestdout = 0
189 if savestdout:
190 sys.stdout = savestdout
192 output = self._output
193 self._output = 0
194 if output:
195 output.close()
197 file = self._file
198 self._file = 0
199 if file and not self._isstdin:
200 file.close()
202 backupfilename = self._backupfilename
203 self._backupfilename = 0
204 if backupfilename and not self._backup:
205 try: os.unlink(backupfilename)
206 except: pass
208 self._isstdin = 0
209 self._buffer = []
210 self._bufindex = 0
212 def readline(self):
213 try:
214 line = self._buffer[self._bufindex]
215 except IndexError:
216 pass
217 else:
218 self._bufindex += 1
219 self._lineno += 1
220 self._filelineno += 1
221 return line
222 if not self._file:
223 if not self._files:
224 return ""
225 self._filename = self._files[0]
226 self._files = self._files[1:]
227 self._filelineno = 0
228 self._file = None
229 self._isstdin = 0
230 self._backupfilename = 0
231 if self._filename == '-':
232 self._filename = '<stdin>'
233 self._file = sys.stdin
234 self._isstdin = 1
235 else:
236 if self._inplace:
237 self._backupfilename = (
238 self._filename + (self._backup or ".bak"))
239 try: os.unlink(self._backupfilename)
240 except os.error: pass
241 # The next few lines may raise IOError
242 os.rename(self._filename, self._backupfilename)
243 self._file = open(self._backupfilename, "r")
244 try:
245 perm = os.fstat(self._file.fileno())[stat.ST_MODE]
246 except:
247 self._output = open(self._filename, "w")
248 else:
249 fd = os.open(self._filename,
250 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
251 perm)
252 self._output = os.fdopen(fd, "w")
253 try:
254 os.chmod(self._filename, perm)
255 except:
256 pass
257 self._savestdout = sys.stdout
258 sys.stdout = self._output
259 else:
260 # This may raise IOError
261 self._file = open(self._filename, "r")
262 self._buffer = self._file.readlines(self._bufsize)
263 self._bufindex = 0
264 if not self._buffer:
265 self.nextfile()
266 # Recursive call
267 return self.readline()
269 def filename(self):
270 return self._filename
272 def lineno(self):
273 return self._lineno
275 def filelineno(self):
276 return self._filelineno
278 def isfirstline(self):
279 return self._filelineno == 1
281 def isstdin(self):
282 return self._isstdin
284 def _test():
285 import getopt
286 inplace = 0
287 backup = 0
288 opts, args = getopt.getopt(sys.argv[1:], "ib:")
289 for o, a in opts:
290 if o == '-i': inplace = 1
291 if o == '-b': backup = a
292 for line in input(args, inplace=inplace, backup=backup):
293 if line[-1:] == '\n': line = line[:-1]
294 if line[-1:] == '\r': line = line[:-1]
295 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
296 isfirstline() and "*" or "", line)
297 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
299 if __name__ == '__main__':
300 _test()