3 The seek_wrapper code is not used if you're using UserAgent with
4 .set_seekable_responses(False), or if you're using the urllib2-level interface
5 without SeekableProcessor or HTTPEquivProcessor. Class closeable_response is
6 instantiated by some handlers (AbstractHTTPHandler), but the closeable_response
7 interface is only depended upon by Browser-level code. Function
8 upgrade_response is only used if you're using Browser or
9 ResponseUpgradeProcessor.
12 Copyright 2006 John J. Lee <jjl@pobox.com>
14 This code is free software; you can redistribute it and/or modify it
15 under the terms of the BSD or ZPL 2.1 licenses (see the file COPYING.txt
16 included with the distribution).
20 import copy
, mimetools
21 from cStringIO
import StringIO
24 # XXX Andrew Dalke kindly sent me a similar class in response to my request on
25 # comp.lang.python, which I then proceeded to lose. I wrote this class
26 # instead, but I think he's released his code publicly since, could pinch the
27 # tests from it, at least...
29 # For testing seek_wrapper invariant (note that
30 # test_urllib2.HandlerTest.test_seekable is expected to fail when this
31 # invariant checking is turned on). The invariant checking is done by module
32 # ipdc, which is available here:
33 # http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/436834
34 ## from ipdbc import ContractBase
35 ## class seek_wrapper(ContractBase):
37 """Adds a seek method to a file object.
39 This is only designed for seeking on readonly file-like objects.
41 Wrapped file-like object must have a read method. The readline method is
42 only supported if that method is present on the wrapped object. The
43 readlines method is always supported. xreadlines and iteration are
44 supported only for Python 2.2 and above.
48 wrapped: the wrapped file object
49 is_closed: true iff .close() has been called
51 WARNING: All other attributes of the wrapped object (ie. those that are not
52 one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
53 are passed through unaltered, which may or may not make sense for your
54 particular file object.
57 # General strategy is to check that cache is full enough, then delegate to
58 # the cache (self.__cache, which is a cStringIO.StringIO instance). A seek
59 # position (self.__pos) is maintained independently of the cache, in order
60 # that a single cache may be shared between multiple seek_wrapper objects.
61 # Copying using module copy shares the cache in this way.
63 def __init__(self
, wrapped
):
64 self
.wrapped
= wrapped
65 self
.__read
_complete
_state
= [False]
66 self
.__is
_closed
_state
= [False]
67 self
.__have
_readline
= hasattr(self
.wrapped
, "readline")
68 self
.__cache
= StringIO()
69 self
.__pos
= 0 # seek position
72 # The end of the cache is always at the same place as the end of the
74 return self
.wrapped
.tell() == len(self
.__cache
.getvalue())
80 def __getattr__(self
, name
):
81 if name
== "is_closed":
82 return self
.__is
_closed
_state
[0]
83 elif name
== "read_complete":
84 return self
.__read
_complete
_state
[0]
86 wrapped
= self
.__dict
__.get("wrapped")
88 return getattr(wrapped
, name
)
90 return getattr(self
.__class
__, name
)
92 def __setattr__(self
, name
, value
):
93 if name
== "is_closed":
94 self
.__is
_closed
_state
[0] = bool(value
)
95 elif name
== "read_complete":
96 if not self
.is_closed
:
97 self
.__read
_complete
_state
[0] = bool(value
)
99 self
.__dict
__[name
] = value
101 def seek(self
, offset
, whence
=0):
102 assert whence
in [0,1,2]
104 # how much data, if any, do we need to read?
105 if whence
== 2: # 2: relative to end of *wrapped* file
106 if offset
< 0: raise ValueError("negative seek offset")
107 # since we don't know yet where the end of that file is, we must
111 if whence
== 0: # 0: absolute
112 if offset
< 0: raise ValueError("negative seek offset")
114 else: # 1: relative to current position
117 raise ValueError("seek to before start of file")
119 end
= len(self
.__cache
.getvalue())
125 self
.__cache
.seek(0, 2)
128 self
.__cache
.write(self
.wrapped
.read())
129 self
.read_complete
= True
130 self
.__pos
= self
.__cache
.tell() - offset
132 data
= self
.wrapped
.read(to_read
)
134 self
.read_complete
= True
136 self
.__cache
.write(data
)
137 # Don't raise an exception even if we've seek()ed past the end
138 # of .wrapped, since fseek() doesn't complain in that case.
139 # Also like fseek(), pretend we have seek()ed past the end,
141 #self.__pos = self.__cache.tell()
151 cpy
= self
.__class
__(self
.wrapped
)
152 cpy
.__cache
= self
.__cache
153 cpy
.__read
_complete
_state
= self
.__read
_complete
_state
154 cpy
.__is
_closed
_state
= self
.__is
_closed
_state
165 def read(self
, size
=-1):
167 end
= len(self
.__cache
.getvalue())
168 available
= end
- pos
170 # enough data already cached?
171 if size
<= available
and size
!= -1:
172 self
.__cache
.seek(pos
)
173 self
.__pos
= pos
+size
174 return self
.__cache
.read(size
)
176 # no, so read sufficient data from wrapped file and cache it
177 self
.__cache
.seek(0, 2)
179 self
.__cache
.write(self
.wrapped
.read())
180 self
.read_complete
= True
182 to_read
= size
- available
184 data
= self
.wrapped
.read(to_read
)
186 self
.read_complete
= True
188 self
.__cache
.write(data
)
189 self
.__cache
.seek(pos
)
191 data
= self
.__cache
.read(size
)
192 self
.__pos
= self
.__cache
.tell()
193 assert self
.__pos
== pos
+ len(data
)
196 def readline(self
, size
=-1):
197 if not self
.__have
_readline
:
198 raise NotImplementedError("no readline method on wrapped object")
200 # line we're about to read might not be complete in the cache, so
201 # read another line first
203 self
.__cache
.seek(0, 2)
204 data
= self
.wrapped
.readline()
206 self
.read_complete
= True
208 self
.__cache
.write(data
)
209 self
.__cache
.seek(pos
)
211 data
= self
.__cache
.readline()
214 self
.__pos
= pos
+size
217 self
.__pos
= pos
+len(data
)
220 def readlines(self
, sizehint
=-1):
222 self
.__cache
.seek(0, 2)
223 self
.__cache
.write(self
.wrapped
.read())
224 self
.read_complete
= True
225 self
.__cache
.seek(pos
)
226 data
= self
.__cache
.readlines(sizehint
)
227 self
.__pos
= self
.__cache
.tell()
230 def __iter__(self
): return self
232 line
= self
.readline()
233 if line
== "": raise StopIteration
236 xreadlines
= __iter__
239 return ("<%s at %s whose wrapped object = %r>" %
240 (self
.__class
__.__name
__, hex(abs(id(self
))), self
.wrapped
))
243 class response_seek_wrapper(seek_wrapper
):
246 Supports copying response objects and setting response body data.
250 def __init__(self
, wrapped
):
251 seek_wrapper
.__init
__(self
, wrapped
)
252 self
._headers
= self
.wrapped
.info()
255 cpy
= seek_wrapper
.__copy
__(self
)
256 # copy headers from delegate
257 cpy
._headers
= copy
.copy(self
.info())
260 # Note that .info() and .geturl() (the only two urllib2 response methods
261 # that are not implemented by seek_wrapper) must be here explicitly rather
262 # than by seek_wrapper's __getattr__ delegation) so that the nasty
263 # dynamically-created HTTPError classes in get_seek_wrapper_class() get the
264 # wrapped object's implementation, and not HTTPError's.
270 return self
.wrapped
.geturl()
272 def set_data(self
, data
):
276 cache
= self
._seek
_wrapper
__cache
= StringIO()
282 # file-like object that always claims to be at end-of-file...
283 def read(self
, size
=-1): return ""
284 def readline(self
, size
=-1): return ""
285 def __iter__(self
): return self
286 def next(self
): return ""
287 def close(self
): pass
289 class eofresponse(eoffile
):
290 def __init__(self
, url
, headers
, code
, msg
):
292 self
._headers
= headers
295 def geturl(self
): return self
._url
296 def info(self
): return self
._headers
299 class closeable_response
:
300 """Avoids unnecessarily clobbering urllib.addinfourl methods on .close().
302 Only supports responses returned by mechanize.HTTPHandler.
304 After .close(), the following methods are supported:
314 and the following attributes are supported:
319 Also supports pickling (but the stdlib currently does something to prevent
320 it: http://python.org/sf/1144636).
323 # presence of this attr indicates is useable after .close()
324 closeable_response
= None
326 def __init__(self
, fp
, headers
, url
, code
, msg
):
328 self
._headers
= headers
333 def _set_fp(self
, fp
):
335 self
.read
= self
.fp
.read
336 self
.readline
= self
.fp
.readline
337 if hasattr(self
.fp
, "readlines"): self
.readlines
= self
.fp
.readlines
338 if hasattr(self
.fp
, "fileno"):
339 self
.fileno
= self
.fp
.fileno
341 self
.fileno
= lambda: None
342 self
.__iter
__ = self
.fp
.__iter
__
343 self
.next
= self
.fp
.next
346 return '<%s at %s whose fp = %r>' % (
347 self
.__class
__.__name
__, hex(abs(id(self
))), self
.fp
)
358 new_wrapped
= eofresponse(
359 self
._url
, self
._headers
, self
.code
, self
.msg
)
360 self
._set
_fp
(new_wrapped
)
362 def __getstate__(self
):
363 # There are three obvious options here:
366 # 3. close socket, pickle state including read position, then open
367 # again on unpickle and use Range header
368 # XXXX um, 4. refuse to pickle unless .close()d. This is better,
369 # actually ("errors should never pass silently"). Pickling doesn't
370 # work anyway ATM, because of http://python.org/sf/1144636 so fix
373 # 2 breaks pickle protocol, because one expects the original object
374 # to be left unscathed by pickling. 3 is too complicated and
375 # surprising (and too much work ;-) to happen in a sane __getstate__.
378 state
= self
.__dict
__.copy()
379 new_wrapped
= eofresponse(
380 self
._url
, self
._headers
, self
.code
, self
.msg
)
381 state
["wrapped"] = new_wrapped
384 def test_response(data
='test data', headers
=[],
385 url
="http://example.com/", code
=200, msg
="OK"):
386 return make_response(data
, headers
, url
, code
, msg
)
388 def test_html_response(data
='test data', headers
=[],
389 url
="http://example.com/", code
=200, msg
="OK"):
390 headers
+= [("Content-type", "text/html")]
391 return make_response(data
, headers
, url
, code
, msg
)
393 def make_response(data
, headers
, url
, code
, msg
):
394 """Convenient factory for objects implementing response interface.
396 data: string containing response body data
397 headers: sequence of (name, value) pairs
399 code: integer response code (e.g. 200)
400 msg: string response code message (e.g. "OK")
403 mime_headers
= make_headers(headers
)
404 r
= closeable_response(StringIO(data
), mime_headers
, url
, code
, msg
)
405 return response_seek_wrapper(r
)
408 def make_headers(headers
):
410 headers: sequence of (name, value) pairs
413 for name_value
in headers
:
414 hdr_text
.append("%s: %s" % name_value
)
415 return mimetools
.Message(StringIO("\n".join(hdr_text
)))
418 # Rest of this module is especially horrible, but needed, at least until fork
419 # urllib2. Even then, may want to preseve urllib2 compatibility.
421 def get_seek_wrapper_class(response
):
422 # in order to wrap response objects that are also exceptions, we must
423 # dynamically subclass the exception :-(((
424 if (isinstance(response
, urllib2
.HTTPError
) and
425 not hasattr(response
, "seek")):
426 if response
.__class
__.__module
__ == "__builtin__":
427 exc_class_name
= response
.__class
__.__name
__
429 exc_class_name
= "%s.%s" % (
430 response
.__class
__.__module
__, response
.__class
__.__name
__)
432 class httperror_seek_wrapper(response_seek_wrapper
, response
.__class
__):
433 # this only derives from HTTPError in order to be a subclass --
434 # the HTTPError behaviour comes from delegation
436 _exc_class_name
= exc_class_name
438 def __init__(self
, wrapped
):
439 response_seek_wrapper
.__init
__(self
, wrapped
)
440 # be compatible with undocumented HTTPError attributes :-(
441 self
.hdrs
= wrapped
.info()
442 self
.filename
= wrapped
.geturl()
446 "<%s (%s instance) at %s "
447 "whose wrapped object = %r>" % (
448 self
.__class
__.__name
__, self
._exc
_class
_name
,
449 hex(abs(id(self
))), self
.wrapped
)
451 wrapper_class
= httperror_seek_wrapper
453 wrapper_class
= response_seek_wrapper
456 def seek_wrapped_response(response
):
457 """Return a copy of response that supports seekable response interface.
459 Accepts responses from both mechanize and urllib2 handlers.
461 Copes with both oridinary response instances and HTTPError instances (which
462 can't be simply wrapped due to the requirement of preserving the exception
465 if not hasattr(response
, "seek"):
466 wrapper_class
= get_seek_wrapper_class(response
)
467 response
= wrapper_class(response
)
468 assert hasattr(response
, "get_data")
471 def upgrade_response(response
):
472 """Return a copy of response that supports Browser response interface.
474 Browser response interface is that of "seekable responses"
475 (response_seek_wrapper), plus the requirement that responses must be
476 useable after .close() (closeable_response).
478 Accepts responses from both mechanize and urllib2 handlers.
480 Copes with both ordinary response instances and HTTPError instances (which
481 can't be simply wrapped due to the requirement of preserving the exception
484 wrapper_class
= get_seek_wrapper_class(response
)
485 if hasattr(response
, "closeable_response"):
486 if not hasattr(response
, "seek"):
487 response
= wrapper_class(response
)
488 assert hasattr(response
, "get_data")
489 return copy
.copy(response
)
491 # a urllib2 handler constructed the response, i.e. the response is an
492 # urllib.addinfourl or a urllib2.HTTPError, instead of a
493 # _Util.closeable_response as returned by e.g. mechanize.HTTPHandler
496 except AttributeError:
500 except AttributeError:
503 # may have already-.read() data from .seek() cache
505 get_data
= getattr(response
, "get_data", None)
509 response
= closeable_response(
510 response
.fp
, response
.info(), response
.geturl(), code
, msg
)
511 response
= wrapper_class(response
)
513 response
.set_data(data
)