4 # - UTF-8 filenames are now allowed (Eli Golovinsky)<br/>
5 # - File object is no more mandatory, Object only needs to have seek() read() attributes (Eli Golovinsky)<br/>
8 # - upload is now done with chunks (Adam Ambrose)
12 # bug fix: kosh @T aesaeion.com
13 # HTTPS support : Ryan Grow <ryangrow @T yahoo.com>
15 # Copyright (C) 2004,2005,2006 Fabien SEISEN
17 # This library is free software; you can redistribute it and/or
18 # modify it under the terms of the GNU Lesser General Public
19 # License as published by the Free Software Foundation; either
20 # version 2.1 of the License, or (at your option) any later version.
22 # This library is distributed in the hope that it will be useful,
23 # but WITHOUT ANY WARRANTY; without even the implied warranty of
24 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 # Lesser General Public License for more details.
27 # You should have received a copy of the GNU Lesser General Public
28 # License along with this library; if not, write to the Free Software
29 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 # you can contact me at: <fabien@seisen.org>
32 # http://fabien.seisen.org/python/
34 # Also modified by Adam Ambrose (aambrose @T pacbell.net) to write data in
35 # chunks (hardcoded to CHUNK_SIZE for now), so the entire contents of the file
36 # don't need to be kept in memory.
39 enable to upload files using multipart/form-data
42 upload files in python:
43 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
45 timeoutsocket.py: overriding Python socket API:
46 http://www.timo-tasi.org/python/timeoutsocket.py
47 http://mail.python.org/pipermail/python-announce-list/2001-December/001095.html
51 u = urllib2.urlopen('http://site.com/path' [, data])
53 data can be a mapping object or a sequence of two-elements tuples
54 (like in original urllib2.urlopen())
55 varname still need to be a string and
56 value can be string of a file object
80 def get_content_type(filename
):
81 return mimetypes
.guess_type(filename
)[0] or 'application/octet-stream'
83 # if sock is None, juste return the estimate size
84 def send_data(v_vars
, v_files
, boundary
, sock
=None):
88 buffer += '--%s\r\n' % boundary
89 buffer += 'Content-Disposition: form-data; name="%s"\r\n' % k
95 for (k
, v
) in v_files
:
97 file_size
= os
.fstat(fd
.fileno())[stat
.ST_SIZE
]
98 name
= fd
.name
.split('/')[-1]
99 if isinstance(name
, unicode):
100 name
= name
.encode('UTF-8')
102 buffer += '--%s\r\n' % boundary
103 buffer += 'Content-Disposition: form-data; name="%s"; filename="%s"\r\n' \
105 buffer += 'Content-Type: %s\r\n' % get_content_type(name
)
106 buffer += 'Content-Length: %s\r\n' % file_size
112 if hasattr(fd
, 'seek'):
115 chunk
= fd
.read(CHUNK_SIZE
)
121 buffer += '--%s--\r\n' % boundary
128 # mainly a copy of HTTPHandler from urllib2
129 class newHTTPHandler(urllib2
.BaseHandler
):
130 def http_open(self
, req
):
131 return self
.do_open(httplib
.HTTP
, req
)
133 def do_open(self
, http_class
, req
):
134 data
= req
.get_data()
137 # mapping object (dict)
138 if req
.has_data() and type(data
) != str:
139 if hasattr(data
, 'items'):
143 if len(data
) and not isinstance(data
[0], tuple):
146 ty
, va
, tb
= sys
.exc_info()
147 raise TypeError, "not a valid non-string sequence or mapping object", tb
150 if hasattr(v
, 'read'):
151 v_files
.append((k
, v
))
153 v_vars
.append( (k
, v
) )
154 # no file ? convert to string
155 if len(v_vars
) > 0 and len(v_files
) == 0:
156 data
= urllib
.urlencode(v_vars
)
159 host
= req
.get_host()
161 raise urllib2
.URLError('no host given')
163 h
= http_class(host
) # will parse host:port
165 h
.putrequest('POST', req
.get_selector())
166 if not 'Content-type' in req
.headers
:
168 boundary
= mimetools
.choose_boundary()
169 l
= send_data(v_vars
, v_files
, boundary
)
170 h
.putheader('Content-Type',
171 'multipart/form-data; boundary=%s' % boundary
)
172 h
.putheader('Content-length', str(l
))
174 h
.putheader('Content-type',
175 'application/x-www-form-urlencoded')
176 if not 'Content-length' in req
.headers
:
177 h
.putheader('Content-length', '%d' % len(data
))
179 h
.putrequest('GET', req
.get_selector())
181 scheme
, sel
= urllib
.splittype(req
.get_selector())
182 sel_host
, sel_path
= urllib
.splithost(sel
)
183 h
.putheader('Host', sel_host
or host
)
184 for name
, value
in self
.parent
.addheaders
:
185 name
= name
.capitalize()
186 if name
not in req
.headers
:
187 h
.putheader(name
, value
)
188 for k
, v
in req
.headers
.items():
190 # httplib will attempt to connect() here. be prepared
191 # to convert a socket error to a URLError.
194 except socket
.error
, err
:
195 raise urllib2
.URLError(err
)
199 l
= send_data(v_vars
, v_files
, boundary
, h
)
200 elif len(v_vars
) > 0:
201 # if data is passed as dict ...
202 data
= urllib
.urlencode(v_vars
)
205 # "normal" urllib2.urlopen()
208 code
, msg
, hdrs
= h
.getreply()
211 resp
= urllib
.addinfourl(fp
, hdrs
, req
.get_full_url())
216 return self
.parent
.error('http', req
, fp
, code
, msg
, hdrs
)
218 urllib2
._old_HTTPHandler
= urllib2
.HTTPHandler
219 urllib2
.HTTPHandler
= newHTTPHandler
221 class newHTTPSHandler(newHTTPHandler
):
222 def https_open(self
, req
):
223 return self
.do_open(httplib
.HTTPS
, req
)
225 urllib2
.HTTPSHandler
= newHTTPSHandler
227 if __name__
== '__main__':
236 SYNTAX: %s -u url -f file [-v]
240 opts
, args
= getopt
.getopt(sys
.argv
[1:], 'hvu:f:')
241 except getopt
.GetoptError
, errmsg
:
242 print "ERROR:", errmsg
249 for name
, value
in opts
:
253 elif name
in ('-v',):
255 elif name
in ('-u',):
257 elif name
in ('-f',):
260 print "invalid argument:", name
274 fd
= open(v_file
, 'r')
278 # u = urllib2.urlopen(v_url, data)
279 req
= urllib2
.Request(v_url
, data
, {})
281 u
= urllib2
.urlopen(req
)
282 except urllib2
.HTTPError
, errobj
:
283 print "HTTPError:", errobj
.code