This commit was manufactured by cvs2svn to create tag 'cnrisync'.
[python/dscho.git] / Lib / pipes.py
blob0ae0b8c9edd2deb6f7615b9b594a4af6df9d7505
1 # Conversion pipeline templates
2 # =============================
5 # The problem:
6 # ------------
7 #
8 # Suppose you have some data that you want to convert to another format
9 # (e.g. from GIF image format to PPM image format). Maybe the
10 # conversion involves several steps (e.g. piping it through compress or
11 # uuencode). Some of the conversion steps may require that their input
12 # is a disk file, others may be able to read standard input; similar for
13 # their output. The input to the entire conversion may also be read
14 # from a disk file or from an open file, and similar for its output.
16 # The module lets you construct a pipeline template by sticking one or
17 # more conversion steps together. It will take care of creating and
18 # removing temporary files if they are necessary to hold intermediate
19 # data. You can then use the template to do conversions from many
20 # different sources to many different destinations. The temporary
21 # file names used are different each time the template is used.
23 # The templates are objects so you can create templates for many
24 # different conversion steps and store them in a dictionary, for
25 # instance.
28 # Directions:
29 # -----------
31 # To create a template:
32 # t = Template()
34 # To add a conversion step to a template:
35 # t.append(command, kind)
36 # where kind is a string of two characters: the first is '-' if the
37 # command reads its standard input or 'f' if it requires a file; the
38 # second likewise for the output. The command must be valid /bin/sh
39 # syntax. If input or output files are required, they are passed as
40 # $IN and $OUT; otherwise, it must be possible to use the command in
41 # a pipeline.
43 # To add a conversion step at the beginning:
44 # t.prepend(command, kind)
46 # To convert a file to another file using a template:
47 # sts = t.copy(infile, outfile)
48 # If infile or outfile are the empty string, standard input is read or
49 # standard output is written, respectively. The return value is the
50 # exit status of the conversion pipeline.
52 # To open a file for reading or writing through a conversion pipeline:
53 # fp = t.open(file, mode)
54 # where mode is 'r' to read the file, or 'w' to write it -- just like
55 # for the built-in function open() or for os.popen().
57 # To create a new template object initialized to a given one:
58 # t2 = t.clone()
60 # For an example, see the function test() at the end of the file.
63 import sys
64 import regex
66 import os
67 import tempfile
68 import string
71 # Conversion step kinds
73 FILEIN_FILEOUT = 'ff' # Must read & write real files
74 STDIN_FILEOUT = '-f' # Must write a real file
75 FILEIN_STDOUT = 'f-' # Must read a real file
76 STDIN_STDOUT = '--' # Normal pipeline element
77 SOURCE = '.-' # Must be first, writes stdout
78 SINK = '-.' # Must be last, reads stdin
80 stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
81 SOURCE, SINK]
84 # A pipeline template is a Template object:
86 class Template:
88 # Template() returns a fresh pipeline template
89 def __init__(self):
90 self.debugging = 0
91 self.reset()
93 # t.__repr__() implements `t`
94 def __repr__(self):
95 return '<Template instance, steps=' + `self.steps` + '>'
97 # t.reset() restores a pipeline template to its initial state
98 def reset(self):
99 self.steps = []
101 # t.clone() returns a new pipeline template with identical
102 # initial state as the current one
103 def clone(self):
104 t = Template()
105 t.steps = self.steps[:]
106 t.debugging = self.debugging
107 return t
109 # t.debug(flag) turns debugging on or off
110 def debug(self, flag):
111 self.debugging = flag
113 # t.append(cmd, kind) adds a new step at the end
114 def append(self, cmd, kind):
115 if type(cmd) <> type(''):
116 raise TypeError, \
117 'Template.append: cmd must be a string'
118 if kind not in stepkinds:
119 raise ValueError, \
120 'Template.append: bad kind ' + `kind`
121 if kind == SOURCE:
122 raise ValueError, \
123 'Template.append: SOURCE can only be prepended'
124 if self.steps <> [] and self.steps[-1][1] == SINK:
125 raise ValueError, \
126 'Template.append: already ends with SINK'
127 if kind[0] == 'f' and regex.search('\$IN', cmd) < 0:
128 raise ValueError, \
129 'Template.append: missing $IN in cmd'
130 if kind[1] == 'f' and regex.search('\$OUT', cmd) < 0:
131 raise ValueError, \
132 'Template.append: missing $OUT in cmd'
133 self.steps.append((cmd, kind))
135 # t.prepend(cmd, kind) adds a new step at the front
136 def prepend(self, cmd, kind):
137 if type(cmd) <> type(''):
138 raise TypeError, \
139 'Template.prepend: cmd must be a string'
140 if kind not in stepkinds:
141 raise ValueError, \
142 'Template.prepend: bad kind ' + `kind`
143 if kind == SINK:
144 raise ValueError, \
145 'Template.prepend: SINK can only be appended'
146 if self.steps <> [] and self.steps[0][1] == SOURCE:
147 raise ValueError, \
148 'Template.prepend: already begins with SOURCE'
149 if kind[0] == 'f' and regex.search('\$IN\>', cmd) < 0:
150 raise ValueError, \
151 'Template.prepend: missing $IN in cmd'
152 if kind[1] == 'f' and regex.search('\$OUT\>', cmd) < 0:
153 raise ValueError, \
154 'Template.prepend: missing $OUT in cmd'
155 self.steps.insert(0, (cmd, kind))
157 # t.open(file, rw) returns a pipe or file object open for
158 # reading or writing; the file is the other end of the pipeline
159 def open(self, file, rw):
160 if rw == 'r':
161 return self.open_r(file)
162 if rw == 'w':
163 return self.open_w(file)
164 raise ValueError, \
165 'Template.open: rw must be \'r\' or \'w\', not ' + `rw`
167 # t.open_r(file) and t.open_w(file) implement
168 # t.open(file, 'r') and t.open(file, 'w') respectively
170 def open_r(self, file):
171 if self.steps == []:
172 return open(file, 'r')
173 if self.steps[-1][1] == SINK:
174 raise ValueError, \
175 'Template.open_r: pipeline ends width SINK'
176 cmd = self.makepipeline(file, '')
177 return os.popen(cmd, 'r')
179 def open_w(self, file):
180 if self.steps == []:
181 return open(file, 'w')
182 if self.steps[0][1] == SOURCE:
183 raise ValueError, \
184 'Template.open_w: pipeline begins with SOURCE'
185 cmd = self.makepipeline('', file)
186 return os.popen(cmd, 'w')
188 def copy(self, infile, outfile):
189 return os.system(self.makepipeline(infile, outfile))
191 def makepipeline(self, infile, outfile):
192 cmd = makepipeline(infile, self.steps, outfile)
193 if self.debugging:
194 print cmd
195 cmd = 'set -x; ' + cmd
196 return cmd
199 def makepipeline(infile, steps, outfile):
200 # Build a list with for each command:
201 # [input filename or '', command string, kind, output filename or '']
203 list = []
204 for cmd, kind in steps:
205 list.append(['', cmd, kind, ''])
207 # Make sure there is at least one step
209 if list == []:
210 list.append(['', 'cat', '--', ''])
212 # Take care of the input and output ends
214 [cmd, kind] = list[0][1:3]
215 if kind[0] == 'f' and not infile:
216 list.insert(0, ['', 'cat', '--', ''])
217 list[0][0] = infile
219 [cmd, kind] = list[-1][1:3]
220 if kind[1] == 'f' and not outfile:
221 list.append(['', 'cat', '--', ''])
222 list[-1][-1] = outfile
224 # Invent temporary files to connect stages that need files
226 garbage = []
227 for i in range(1, len(list)):
228 lkind = list[i-1][2]
229 rkind = list[i][2]
230 if lkind[1] == 'f' or rkind[0] == 'f':
231 temp = tempfile.mktemp()
232 garbage.append(temp)
233 list[i-1][-1] = list[i][0] = temp
235 for item in list:
236 [inf, cmd, kind, outf] = item
237 if kind[1] == 'f':
238 cmd = 'OUT=' + quote(outf) + '; ' + cmd
239 if kind[0] == 'f':
240 cmd = 'IN=' + quote(inf) + '; ' + cmd
241 if kind[0] == '-' and inf:
242 cmd = cmd + ' <' + quote(inf)
243 if kind[1] == '-' and outf:
244 cmd = cmd + ' >' + quote(outf)
245 item[1] = cmd
247 cmdlist = list[0][1]
248 for item in list[1:]:
249 [cmd, kind] = item[1:3]
250 if item[0] == '':
251 if 'f' in kind:
252 cmd = '{ ' + cmd + '; }'
253 cmdlist = cmdlist + ' |\n' + cmd
254 else:
255 cmdlist = cmdlist + '\n' + cmd
257 if garbage:
258 rmcmd = 'rm -f'
259 for file in garbage:
260 rmcmd = rmcmd + ' ' + quote(file)
261 trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
262 cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
264 return cmdlist
267 # Reliably quote a string as a single argument for /bin/sh
269 _safechars = string.letters + string.digits + '!@%_-+=:,./' # Safe unquoted
270 _funnychars = '"`$\\' # Unsafe inside "double quotes"
272 def quote(file):
273 for c in file:
274 if c not in _safechars:
275 break
276 else:
277 return file
278 if '\'' not in file:
279 return '\'' + file + '\''
280 res = ''
281 for c in file:
282 if c in _funnychars:
283 c = '\\' + c
284 res = res + c
285 return '"' + res + '"'
288 # Small test program and example
290 def test():
291 import os
292 print 'Testing...'
293 t = Template()
294 t.append('togif $IN $OUT', 'ff')
295 t.append('giftoppm', '--')
296 t.append('ppmtogif >$OUT', '-f')
297 t.append('fromgif $IN $OUT', 'ff')
298 t.debug(1)
299 FILE = '/usr/local/images/rgb/rogues/guido.rgb'
300 t.copy(FILE, '@temp')
301 print 'Done.'