1 """Python backwards-compat., date/time routines, seekable file object wrapper.
3 Copyright 2002-2003 John J Lee <jjl@pobox.com>
5 This code is free software; you can redistribute it and/or modify it under
6 the terms of the BSD License (see the file COPYING included with the
16 import re
, string
, time
17 from types
import TupleType
18 from StringIO
import StringIO
21 from exceptions
import StopIteration
23 from ClientCookie
._ClientCookie
import StopIteration
25 def startswith(string
, initial
):
26 if len(initial
) > len(string
): return False
27 return string
[:len(initial
)] == initial
29 def endswith(string
, final
):
30 if len(final
) > len(string
): return False
31 return string
[-len(final
):] == final
33 def compat_issubclass(obj
, tuple_or_class
):
35 if type(tuple_or_class
) == TupleType
:
36 for klass
in tuple_or_class
:
37 if issubclass(obj
, klass
):
40 return issubclass(obj
, tuple_or_class
)
49 from calendar
import timegm
50 timegm((2045, 1, 1, 22, 23, 32)) # overflows in 2.1
52 # Number of days per month (except for February in leap years)
53 mdays
= [0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
55 # Return 1 for leap years, 0 for non-leap years
57 return year
% 4 == 0 and (year
% 100 <> 0 or year
% 400 == 0)
59 # Return number of leap years in range [y1, y2)
60 # Assume y1 <= y2 and no funny (non-leap century) years
62 return (y2
+3)/4 - (y1
+3)/4
66 """Unrelated but handy function to calculate Unix timestamp from GMT."""
67 year
, month
, day
, hour
, minute
, second
= tuple[:6]
69 assert 1 <= month
<= 12
70 days
= 365*(year
-EPOCH
) + leapdays(EPOCH
, year
)
71 for i
in range(1, month
):
72 days
= days
+ mdays
[i
]
73 if month
> 2 and isleap(year
):
76 hours
= days
*24 + hour
77 minutes
= hours
*60 + minute
78 seconds
= minutes
*60L + second
82 # Date/time conversion routines for formats used by the HTTP protocol.
86 year
, month
, mday
, hour
, min, sec
= tt
[:6]
87 if ((year
>= EPOCH
) and (1 <= month
<= 12) and (1 <= mday
<= 31) and
88 (0 <= hour
<= 24) and (0 <= min <= 59) and (0 <= sec
<= 61)):
93 days
= ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
94 months
= ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
95 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
97 for month
in months
: months_lower
.append(string
.lower(month
))
100 def time2isoz(t
=None):
101 """Return a string representing time in seconds since epoch, t.
103 If the function is called without an argument, it will use the current
106 The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
107 representing Universal Time (UTC, aka GMT). An example of this format is:
112 if t
is None: t
= time
.time()
113 year
, mon
, mday
, hour
, min, sec
= time
.gmtime(t
)[:6]
114 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
115 year
, mon
, mday
, hour
, min, sec
)
117 def time2netscape(t
=None):
118 """Return a string representing time in seconds since epoch, t.
120 If the function is called without an argument, it will use the current
123 The format of the returned string is like this:
125 Wdy, DD-Mon-YYYY HH:MM:SS GMT
128 if t
is None: t
= time
.time()
129 year
, mon
, mday
, hour
, min, sec
, wday
= time
.gmtime(t
)[:7]
130 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
131 days
[wday
], mday
, months
[mon
-1], year
, hour
, min, sec
)
134 UTC_ZONES
= {"GMT": None, "UTC": None, "UT": None, "Z": None}
136 timezone_re
= re
.compile(r
"^([-+])?(\d\d?):?(\d\d)?$")
137 def offset_from_tz_string(tz
):
139 if UTC_ZONES
.has_key(tz
):
142 m
= timezone_re
.search(tz
)
144 offset
= 3600 * int(m
.group(2))
146 offset
= offset
+ 60 * int(m
.group(3))
147 if m
.group(1) == '-':
151 def _str2time(day
, mon
, yr
, hr
, min, sec
, tz
):
152 # translate month name to number
153 # month numbers start with 1 (January)
155 mon
= months_lower
.index(string
.lower(mon
))+1
157 # maybe it's already a number
167 # make sure clock elements are defined
168 if hr
is None: hr
= 0
169 if min is None: min = 0
170 if sec
is None: sec
= 0
179 # find "obvious" year
180 cur_yr
= time
.localtime(time
.time())[0]
186 if m
> 0: yr
= yr
+ 100
189 # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
190 t
= my_timegm((yr
, mon
, day
, hr
, min, sec
, tz
))
193 # adjust time using timezone string, to get absolute time since epoch
196 tz
= string
.upper(tz
)
197 offset
= offset_from_tz_string(tz
)
205 strict_re
= re
.compile(r
"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) (\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
206 wkday_re
= re
.compile(
207 r
"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re
.I
)
208 loose_http_re
= re
.compile(
216 (?:\s+|:) # separator before clock
217 (\d\d?):(\d\d) # hour:min
218 (?::(\d\d))? # optional seconds
221 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
223 (?:\(\w+\))? # ASCII representation of timezone in parens.
226 """Returns time in seconds since epoch of time represented by a string.
228 Return value is an integer.
230 None is returned if the format of str is unrecognized, the time is outside
231 the representable range, or the timezone string is not recognized. The
232 time formats recognized are the same as for parse_date. If the string
233 contains no timezone, UTC is assumed.
235 The timezone in the string may be numerical (like "-0800" or "+0100") or a
236 string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
237 timezone strings equivalent to UTC (zero offset) are known to the function.
239 The function loosely parses the following formats:
241 Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
242 Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
243 Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
244 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
245 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
246 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
248 The parser ignores leading and trailing whitespace. The time may be
251 If the year is given with only 2 digits, then parse_date will select the
252 century that makes the year closest to the current date.
255 # fast exit for strictly conforming string
256 m
= strict_re
.search(text
)
259 mon
= months_lower
.index(string
.lower(g
[1])) + 1
260 tt
= (int(g
[2]), mon
, int(g
[0]),
261 int(g
[3]), int(g
[4]), float(g
[5]))
264 # No, we need some messy parsing...
267 text
= string
.lstrip(text
)
268 text
= wkday_re
.sub("", text
, 1) # Useless weekday
270 # tz is time zone specifier string
271 day
, mon
, yr
, hr
, min, sec
, tz
= [None]*7
274 m
= loose_http_re
.search(text
)
276 day
, mon
, yr
, hr
, min, sec
, tz
= m
.groups()
278 return None # bad format
280 return _str2time(day
, mon
, yr
, hr
, min, sec
, tz
)
287 (\d\d?) # numerical month
291 (?:\s+|[-:Tt]) # separator before clock
292 (\d\d?):?(\d\d) # hour:min
293 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
296 ([-+]?\d\d?:?(:?\d\d)?
297 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
301 As for httpstr2time, but parses the ISO 8601 formats:
303 1994-02-03 14:15:29 -0100 -- ISO 8601 format
304 1994-02-03 14:15:29 -- zone is optional
305 1994-02-03 -- only date
306 1994-02-03T14:15:29 -- Use T as separator
307 19940203T141529Z -- ISO 8601 compact format
308 19940203 -- only date
312 text
= string
.lstrip(text
)
314 # tz is time zone specifier string
315 day
, mon
, yr
, hr
, min, sec
, tz
= [None]*7
318 m
= iso_re
.search(text
)
320 # XXX there's an extra bit of the timezone I'm ignoring here: is
321 # this the right thing to do?
322 yr
, mon
, day
, hr
, min, sec
, tz
, _
= m
.groups()
324 return None # bad format
326 return _str2time(day
, mon
, yr
, hr
, min, sec
, tz
)
330 # XXX Andrew Dalke kindly sent me a similar class in response to my request on
331 # comp.lang.python, which I then proceeded to lose. I wrote this class
332 # instead, but I think he's released his code publicly since, could pinch the
333 # tests from it, at least...
335 """Adds a seek method to a file object.
337 This is only designed for seeking on readonly file-like objects.
339 Wrapped file-like object must have a read method. The readline method is
340 only supported if that method is present on the wrapped object. The
341 readlines method is always supported. xreadlines and iteration are
342 supported only for Python 2.2 and above.
344 Public attribute: wrapped (the wrapped file object).
346 WARNING: All other attributes of the wrapped object (ie. those that are not
347 one of wrapped, read, readline, readlines, xreadlines, __iter__ and next)
348 are passed through unaltered, which may or may not make sense for your
349 particular file object.
352 # General strategy is to check that cache is full enough, then delegate
353 # everything to the cache (self._cache, which is a StringIO.StringIO
354 # instance. Seems to be some cStringIO.StringIO problem on 1.5.2 -- I
355 # get a StringOobject, with no readlines method.
357 # Invariant: the end of the cache is always at the same place as the
358 # end of the wrapped file:
359 # self.wrapped.tell() == len(self._cache.getvalue())
361 def __init__(self
, wrapped
):
362 self
.wrapped
= wrapped
363 self
.__have
_readline
= hasattr(self
.wrapped
, "readline")
364 self
.__cache
= StringIO()
366 def __getattr__(self
, name
): return getattr(self
.wrapped
, name
)
368 def seek(self
, offset
, whence
=0):
369 # make sure we have read all data up to the point we are seeking to
370 pos
= self
.__cache
.tell()
371 if whence
== 0: # absolute
372 to_read
= offset
- pos
373 elif whence
== 1: # relative to current position
375 elif whence
== 2: # relative to end of *wrapped* file
376 # since we don't know yet where the end of that file is, we must
379 if to_read
>= 0 or to_read
is None:
381 self
.__cache
.write(self
.wrapped
.read())
383 self
.__cache
.write(self
.wrapped
.read(to_read
))
384 self
.__cache
.seek(pos
)
386 return self
.__cache
.seek(offset
, whence
)
388 def read(self
, size
=-1):
389 pos
= self
.__cache
.tell()
391 self
.__cache
.seek(pos
)
393 end
= len(self
.__cache
.getvalue())
394 available
= end
- pos
396 # enough data already cached?
397 if size
<= available
and size
!= -1:
398 return self
.__cache
.read(size
)
400 # no, so read sufficient data from wrapped file and cache it
401 to_read
= size
- available
402 assert to_read
> 0 or size
== -1
403 self
.__cache
.seek(0, 2)
405 self
.__cache
.write(self
.wrapped
.read())
407 self
.__cache
.write(self
.wrapped
.read(to_read
))
408 self
.__cache
.seek(pos
)
410 return self
.__cache
.read(size
)
412 def readline(self
, size
=-1):
413 if not self
.__have
_readline
:
414 raise NotImplementedError("no readline method on wrapped object")
416 # line we're about to read might not be complete in the cache, so
417 # read another line first
418 pos
= self
.__cache
.tell()
419 self
.__cache
.seek(0, 2)
420 self
.__cache
.write(self
.wrapped
.readline())
421 self
.__cache
.seek(pos
)
423 data
= self
.__cache
.readline()
426 self
.__cache
.seek(pos
+size
)
431 def readlines(self
, sizehint
=-1):
432 pos
= self
.__cache
.tell()
433 self
.__cache
.seek(0, 2)
434 self
.__cache
.write(self
.wrapped
.read())
435 self
.__cache
.seek(pos
)
437 return self
.__cache
.readlines(sizehint
)
438 except TypeError: # 1.5.2 hack
439 return self
.__cache
.readlines()
441 def __iter__(self
): return self
443 line
= self
.readline()
444 if line
== "": raise StopIteration
447 xreadlines
= __iter__
450 return ("<%s at %s whose wrapped object = %s>" %
451 (self
.__class
__.__name
__, `
id(self
)`
, `self
.wrapped`
))
456 self
.readlines
= None
458 if self
.wrapped
: self
.wrapped
.close()