Quick update to the README file. For intros and books we now point to
[python/dscho.git] / Lib / fileinput.py
blob8f73fad222f2cd6dbe28a149cef29ef4c8016d13
1 """Helper class to quickly write a loop over all standard input files.
3 Typical use is:
5 import fileinput
6 for line in fileinput.input():
7 process(line)
9 This iterates over the lines of all files listed in sys.argv[1:],
10 defaulting to sys.stdin if the list is empty. If a filename is '-' it
11 is also replaced by sys.stdin. To specify an alternative list of
12 filenames, pass it as the argument to input(). A single file name is
13 also allowed.
15 Functions filename(), lineno() return the filename and cumulative line
16 number of the line that has just been read; filelineno() returns its
17 line number in the current file; isfirstline() returns true iff the
18 line just read is the first line of its file; isstdin() returns true
19 iff the line was read from sys.stdin. Function nextfile() closes the
20 current file so that the next iteration will read the first line from
21 the next file (if any); lines not read from the file will not count
22 towards the cumulative line count; the filename is not changed until
23 after the first line of the next file has been read. Function close()
24 closes the sequence.
26 Before any lines have been read, filename() returns None and both line
27 numbers are zero; nextfile() has no effect. After all lines have been
28 read, filename() and the line number functions return the values
29 pertaining to the last line read; nextfile() has no effect.
31 All files are opened in text mode. If an I/O error occurs during
32 opening or reading a file, the IOError exception is raised.
34 If sys.stdin is used more than once, the second and further use will
35 return no lines, except perhaps for interactive use, or if it has been
36 explicitly reset (e.g. using sys.stdin.seek(0)).
38 Empty files are opened and immediately closed; the only time their
39 presence in the list of filenames is noticeable at all is when the
40 last file opened is empty.
42 It is possible that the last line of a file doesn't end in a newline
43 character; otherwise lines are returned including the trailing
44 newline.
46 Class FileInput is the implementation; its methods filename(),
47 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48 correspond to the functions in the module. In addition it has a
49 readline() method which returns the next input line, and a
50 __getitem__() method which implements the sequence behavior. The
51 sequence must be accessed in strictly sequential order; sequence
52 access and readline() cannot be mixed.
54 Optional in-place filtering: if the keyword argument inplace=1 is
55 passed to input() or to the FileInput constructor, the file is moved
56 to a backup file and standard output is directed to the input file.
57 This makes it possible to write a filter that rewrites its input file
58 in place. If the keyword argument backup=".<some extension>" is also
59 given, it specifies the extension for the backup file, and the backup
60 file remains around; by default, the extension is ".bak" and it is
61 deleted when the output file is closed. In-place filtering is
62 disabled when standard input is read. XXX The current implementation
63 does not work for MS-DOS 8+3 filesystems.
65 XXX Possible additions:
67 - optional getopt argument processing
68 - specify open mode ('r' or 'rb')
69 - specify buffer size
70 - fileno()
71 - isatty()
72 - read(), read(size), even readlines()
74 """
76 import sys, os, stat
78 _state = None
80 def input(files=(), inplace=0, backup=""):
81 global _state
82 if _state and _state._file:
83 raise RuntimeError, "input() already active"
84 _state = FileInput(files, inplace, backup)
85 return _state
87 def close():
88 global _state
89 state = _state
90 _state = None
91 if state:
92 state.close()
94 def nextfile():
95 if not _state:
96 raise RuntimeError, "no active input()"
97 return _state.nextfile()
99 def filename():
100 if not _state:
101 raise RuntimeError, "no active input()"
102 return _state.filename()
104 def lineno():
105 if not _state:
106 raise RuntimeError, "no active input()"
107 return _state.lineno()
109 def filelineno():
110 if not _state:
111 raise RuntimeError, "no active input()"
112 return _state.filelineno()
114 def isfirstline():
115 if not _state:
116 raise RuntimeError, "no active input()"
117 return _state.isfirstline()
119 def isstdin():
120 if not _state:
121 raise RuntimeError, "no active input()"
122 return _state.isstdin()
124 class FileInput:
126 def __init__(self, files=(), inplace=0, backup=""):
127 if type(files) == type(''):
128 files = (files,)
129 else:
130 files = tuple(files)
131 if not files:
132 files = tuple(sys.argv[1:])
133 if not files:
134 files = ('-',)
135 self._files = files
136 self._inplace = inplace
137 self._backup = backup
138 self._savestdout = None
139 self._output = None
140 self._filename = None
141 self._lineno = 0
142 self._filelineno = 0
143 self._file = None
144 self._isstdin = 0
145 self._backupfilename = None
147 def __del__(self):
148 self.close()
150 def close(self):
151 self.nextfile()
152 self._files = ()
154 def __getitem__(self, i):
155 if i != self._lineno:
156 raise RuntimeError, "accessing lines out of order"
157 line = self.readline()
158 if not line:
159 raise IndexError, "end of input reached"
160 return line
162 def nextfile(self):
163 savestdout = self._savestdout
164 self._savestdout = 0
165 if savestdout:
166 sys.stdout = savestdout
168 output = self._output
169 self._output = 0
170 if output:
171 output.close()
173 file = self._file
174 self._file = 0
175 if file and not self._isstdin:
176 file.close()
178 backupfilename = self._backupfilename
179 self._backupfilename = 0
180 if backupfilename and not self._backup:
181 try: os.unlink(backupfilename)
182 except: pass
184 self._isstdin = 0
186 def readline(self):
187 if not self._file:
188 if not self._files:
189 return ""
190 self._filename = self._files[0]
191 self._files = self._files[1:]
192 self._filelineno = 0
193 self._file = None
194 self._isstdin = 0
195 self._backupfilename = 0
196 if self._filename == '-':
197 self._filename = '<stdin>'
198 self._file = sys.stdin
199 self._isstdin = 1
200 else:
201 if self._inplace:
202 self._backupfilename = (
203 self._filename + (self._backup or ".bak"))
204 try: os.unlink(self._backupfilename)
205 except os.error: pass
206 # The next few lines may raise IOError
207 os.rename(self._filename, self._backupfilename)
208 self._file = open(self._backupfilename, "r")
209 try:
210 perm = os.fstat(self._file.fileno())[stat.ST_MODE]
211 except:
212 self._output = open(self._filename, "w")
213 else:
214 fd = os.open(self._filename,
215 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
216 perm)
217 self._output = os.fdopen(fd, "w")
218 try:
219 os.chmod(self._filename, perm)
220 except:
221 pass
222 self._savestdout = sys.stdout
223 sys.stdout = self._output
224 else:
225 # This may raise IOError
226 self._file = open(self._filename, "r")
227 line = self._file.readline()
228 if line:
229 self._lineno = self._lineno + 1
230 self._filelineno = self._filelineno + 1
231 return line
232 self.nextfile()
233 # Recursive call
234 return self.readline()
236 def filename(self):
237 return self._filename
239 def lineno(self):
240 return self._lineno
242 def filelineno(self):
243 return self._filelineno
245 def isfirstline(self):
246 return self._filelineno == 1
248 def isstdin(self):
249 return self._isstdin
251 def _test():
252 import getopt
253 inplace = 0
254 backup = 0
255 opts, args = getopt.getopt(sys.argv[1:], "ib:")
256 for o, a in opts:
257 if o == '-i': inplace = 1
258 if o == '-b': backup = a
259 for line in input(args, inplace=inplace, backup=backup):
260 if line[-1:] == '\n': line = line[:-1]
261 if line[-1:] == '\r': line = line[:-1]
262 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
263 isfirstline() and "*" or "", line)
264 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
266 if __name__ == '__main__':
267 _test()