1 """Strptime-related classes and functions.
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information as is returned by time.strftime()
9 firstjulian -- Calculates the Julian date up to the first of the specified
11 gregorian -- Calculates the Gregorian date based on the Julian day and
13 julianday -- Calculates the Julian day since the first of the year based
15 dayofweek -- Calculates the day of the week from the Gregorian date.
16 strptime -- Calculates the time struct represented by the passed-in string
18 Requires Python 2.2.1 or higher.
19 Can be used in Python 2.2 if the following line is added:
20 >>> True = 1; False = 0
25 from re
import compile as re_compile
26 from re
import IGNORECASE
28 __author__
= "Brett Cannon"
29 __email__
= "drifty@bigfoot.com"
31 __all__
= ['strptime']
33 RegexpType
= type(re_compile(''))
36 # Figure out what the current language is set to.
37 current_lang
= locale
.getlocale(locale
.LC_TIME
)[0]
41 current_lang
= locale
.getdefaultlocale()[0]
47 class LocaleTime(object):
48 """Stores and handles locale-specific information related to time.
50 ATTRIBUTES (all read-only after instance creation! Instance variables that
51 store the values have mangled names):
52 f_weekday -- full weekday names (7-item list)
53 a_weekday -- abbreviated weekday names (7-item list)
54 f_month -- full weekday names (14-item list; dummy value in [0], which
56 a_month -- abbreviated weekday names (13-item list, dummy value in
57 [0], which is added by code)
58 am_pm -- AM/PM representation (2-item list)
59 LC_date_time -- format string for date/time representation (string)
60 LC_date -- format string for date representation (string)
61 LC_time -- format string for time representation (string)
62 timezone -- daylight- and non-daylight-savings timezone representation
63 (3-item list; code tacks on blank item at end for
64 possible lack of timezone such as UTC)
65 lang -- Language used by instance (string)
68 def __init__(self
, f_weekday
=None, a_weekday
=None, f_month
=None,
69 a_month
=None, am_pm
=None, LC_date_time
=None, LC_time
=None,
70 LC_date
=None, timezone
=None, lang
=None):
71 """Optionally set attributes with passed-in values."""
73 self
.__f
_weekday
= None
74 elif len(f_weekday
) == 7:
75 self
.__f
_weekday
= list(f_weekday
)
77 raise TypeError("full weekday names must be a 7-item sequence")
79 self
.__a
_weekday
= None
80 elif len(a_weekday
) == 7:
81 self
.__a
_weekday
= list(a_weekday
)
84 "abbreviated weekday names must be a 7-item sequence")
87 elif len(f_month
) == 12:
88 self
.__f
_month
= self
.__pad
(f_month
, True)
90 raise TypeError("full month names must be a 12-item sequence")
93 elif len(a_month
) == 12:
94 self
.__a
_month
= self
.__pad
(a_month
, True)
97 "abbreviated month names must be a 12-item sequence")
100 elif len(am_pm
) == 2:
103 raise TypeError("AM/PM representation must be a 2-item sequence")
104 self
.__LC
_date
_time
= LC_date_time
105 self
.__LC
_time
= LC_time
106 self
.__LC
_date
= LC_date
107 self
.__timezone
= timezone
109 if len(timezone
) != 2:
110 raise TypeError("timezone names must contain 2 items")
112 self
.__timezone
= self
.__pad
(timezone
, False)
115 def __pad(self
, seq
, front
):
116 # Add '' to seq to either front (is True), else the back.
124 def __set_nothing(self
, stuff
):
125 # Raise TypeError when trying to set an attribute.
126 raise TypeError("attribute does not support assignment")
128 def __get_f_weekday(self
):
129 # Fetch self.f_weekday.
130 if not self
.__f
_weekday
:
131 self
.__calc
_weekday
()
132 return self
.__f
_weekday
134 def __get_a_weekday(self
):
135 # Fetch self.a_weekday.
136 if not self
.__a
_weekday
:
137 self
.__calc
_weekday
()
138 return self
.__a
_weekday
140 f_weekday
= property(__get_f_weekday
, __set_nothing
,
141 doc
="Full weekday names")
142 a_weekday
= property(__get_a_weekday
, __set_nothing
,
143 doc
="Abbreviated weekday names")
145 def __get_f_month(self
):
146 # Fetch self.f_month.
147 if not self
.__f
_month
:
149 return self
.__f
_month
151 def __get_a_month(self
):
152 # Fetch self.a_month.
153 if not self
.__a
_month
:
155 return self
.__a
_month
157 f_month
= property(__get_f_month
, __set_nothing
,
158 doc
="Full month names (dummy value at index 0)")
159 a_month
= property(__get_a_month
, __set_nothing
,
160 doc
="Abbreviated month names (dummy value at index 0)")
162 def __get_am_pm(self
):
168 am_pm
= property(__get_am_pm
, __set_nothing
, doc
="AM/PM representation")
170 def __get_timezone(self
):
171 # Fetch self.timezone.
172 if not self
.__timezone
:
173 self
.__calc
_timezone
()
174 return self
.__timezone
176 timezone
= property(__get_timezone
, __set_nothing
,
177 doc
="Timezone representation (dummy value at index 2)")
179 def __get_LC_date_time(self
):
180 # Fetch self.LC_date_time.
181 if not self
.__LC
_date
_time
:
182 self
.__calc
_date
_time
()
183 return self
.__LC
_date
_time
185 def __get_LC_date(self
):
186 # Fetch self.LC_date.
187 if not self
.__LC
_date
:
188 self
.__calc
_date
_time
()
189 return self
.__LC
_date
191 def __get_LC_time(self
):
192 # Fetch self.LC_time.
193 if not self
.__LC
_time
:
194 self
.__calc
_date
_time
()
195 return self
.__LC
_time
197 LC_date_time
= property(
198 __get_LC_date_time
, __set_nothing
,
200 "Format string for locale's date/time representation ('%c' format)")
201 LC_date
= property(__get_LC_date
, __set_nothing
,
202 doc
="Format string for locale's date representation ('%x' format)")
203 LC_time
= property(__get_LC_time
, __set_nothing
,
204 doc
="Format string for locale's time representation ('%X' format)")
206 def __get_lang(self
):
212 lang
= property(__get_lang
, __set_nothing
,
213 doc
="Language used for instance")
215 def __calc_weekday(self
):
216 # Set self.__a_weekday and self.__f_weekday using the calendar
218 a_weekday
= [calendar
.day_abbr
[i
] for i
in range(7)]
219 f_weekday
= [calendar
.day_name
[i
] for i
in range(7)]
220 if not self
.__a
_weekday
:
221 self
.__a
_weekday
= a_weekday
222 if not self
.__f
_weekday
:
223 self
.__f
_weekday
= f_weekday
225 def __calc_month(self
):
226 # Set self.__f_month and self.__a_month using the calendar module.
227 a_month
= [calendar
.month_abbr
[i
] for i
in range(13)]
228 f_month
= [calendar
.month_name
[i
] for i
in range(13)]
229 if not self
.__a
_month
:
230 self
.__a
_month
= a_month
231 if not self
.__f
_month
:
232 self
.__f
_month
= f_month
234 def __calc_am_pm(self
):
235 # Set self.__am_pm by using time.strftime().
237 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
238 # magical; just happened to have used it everywhere else where a
239 # static date was needed.
242 time_tuple
= time
.struct_time((1999,3,17,hour
,44,55,2,76,0))
243 am_pm
.append(time
.strftime("%p", time_tuple
))
246 def __calc_date_time(self
):
247 # Set self.__date_time, self.__date, & self.__time by using
250 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
251 # overloaded numbers is minimized. The order in which searches for
252 # values within the format string is very important; it eliminates
253 # possible ambiguity for what something represents.
254 time_tuple
= time
.struct_time((1999,3,17,22,44,55,2,76,0))
255 date_time
= [None, None, None]
256 date_time
[0] = time
.strftime("%c", time_tuple
)
257 date_time
[1] = time
.strftime("%x", time_tuple
)
258 date_time
[2] = time
.strftime("%X", time_tuple
)
259 for offset
,directive
in ((0,'%c'), (1,'%x'), (2,'%X')):
260 current_format
= date_time
[offset
]
262 ('%', '%%'), (self
.f_weekday
[2], '%A'),
263 (self
.f_month
[3], '%B'), (self
.a_weekday
[2], '%a'),
264 (self
.a_month
[3], '%b'), (self
.am_pm
[1], '%p'),
265 (self
.timezone
[0], '%Z'), (self
.timezone
[1], '%Z'),
266 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
267 ('44', '%M'), ('55', '%S'), ('76', '%j'),
268 ('17', '%d'), ('03', '%m'), ('3', '%m'),
269 # '3' needed for when no leading zero.
270 ('2', '%w'), ('10', '%I')):
271 # Must deal with possible lack of locale info
272 # manifesting itself as the empty string (e.g., Swedish's
273 # lack of AM/PM info) or a platform returning a tuple of empty
274 # strings (e.g., MacOS 9 having timezone as ('','')).
276 current_format
= current_format
.replace(old
, new
)
277 time_tuple
= time
.struct_time((1999,1,3,1,1,1,6,3,0))
278 if time
.strftime(directive
, time_tuple
).find('00'):
282 date_time
[offset
] = current_format
.replace('11', U_W
)
283 if not self
.__LC
_date
_time
:
284 self
.__LC
_date
_time
= date_time
[0]
285 if not self
.__LC
_date
:
286 self
.__LC
_date
= date_time
[1]
287 if not self
.__LC
_time
:
288 self
.__LC
_time
= date_time
[2]
290 def __calc_timezone(self
):
291 # Set self.__timezone by using time.tzname.
293 # Empty string used for matching when timezone is not used/needed such
295 self
.__timezone
= self
.__pad
(time
.tzname
, 0)
297 def __calc_lang(self
):
298 # Set self.__lang by using __getlang().
299 self
.__lang
= _getlang()
304 """Handle conversion from format directives to regexes."""
306 def __init__(self
, locale_time
=LocaleTime()):
307 """Init inst with non-locale regexes and store LocaleTime object."""
308 #XXX: Does 'Y' need to worry about having less or more than 4 digits?
309 base
= super(TimeRE
, self
)
311 # The " \d" option is to make %c from ANSI C work
312 'd': r
"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
313 'H': r
"(?P<H>2[0-3]|[0-1]\d|\d)",
314 'I': r
"(?P<I>1[0-2]|0[1-9]|[1-9])",
315 'j': r
"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
316 'm': r
"(?P<m>1[0-2]|0[1-9]|[1-9])",
317 'M': r
"(?P<M>[0-5]\d|\d)",
318 'S': r
"(?P<S>6[0-1]|[0-5]\d|\d)",
319 'U': r
"(?P<U>5[0-3]|[0-4]\d|\d)",
320 'w': r
"(?P<w>[0-6])",
321 # W is set below by using 'U'
323 'Y': r
"(?P<Y>\d\d\d\d)"})
324 base
.__setitem
__('W', base
.__getitem
__('U'))
325 self
.locale_time
= locale_time
327 def __getitem__(self
, fetch
):
328 """Try to fetch regex; if it does not exist, construct it."""
330 return super(TimeRE
, self
).__getitem
__(fetch
)
333 'A': lambda: self
.__seqToRE
(self
.locale_time
.f_weekday
, fetch
),
334 'a': lambda: self
.__seqToRE
(self
.locale_time
.a_weekday
, fetch
),
335 'B': lambda: self
.__seqToRE
(self
.locale_time
.f_month
[1:],
337 'b': lambda: self
.__seqToRE
(self
.locale_time
.a_month
[1:],
339 'c': lambda: self
.pattern(self
.locale_time
.LC_date_time
),
340 'p': lambda: self
.__seqToRE
(self
.locale_time
.am_pm
, fetch
),
341 'x': lambda: self
.pattern(self
.locale_time
.LC_date
),
342 'X': lambda: self
.pattern(self
.locale_time
.LC_time
),
343 'Z': lambda: self
.__seqToRE
(self
.locale_time
.timezone
, fetch
),
346 if fetch
in constructors
:
347 self
[fetch
] = constructors
[fetch
]()
352 def __seqToRE(self
, to_convert
, directive
):
353 """Convert a list to a regex string for matching a directive."""
355 """Sort based on length.
357 Done in case for some strange reason that names in the locale only
358 differ by a suffix and thus want the name with the suffix to match
369 return cmp(b_length
, a_length
)
371 to_convert
= to_convert
[:] # Don't want to change value in-place.
372 for value
in to_convert
:
377 to_convert
.sort(sorter
)
378 regex
= '|'.join(to_convert
)
379 regex
= '(?P<%s>%s' % (directive
, regex
)
382 def pattern(self
, format
):
383 """Return re pattern for the format string."""
384 processed_format
= ''
385 whitespace_replacement
= re_compile('\s+')
386 format
= whitespace_replacement
.sub('\s*', format
)
387 while format
.find('%') != -1:
388 directive_index
= format
.index('%')+1
389 processed_format
= "%s%s%s" % (processed_format
,
390 format
[:directive_index
-1],
391 self
[format
[directive_index
]])
392 format
= format
[directive_index
+1:]
393 return "%s%s" % (processed_format
, format
)
395 def compile(self
, format
):
396 """Return a compiled re object for the format string."""
397 return re_compile(self
.pattern(format
), IGNORECASE
)
399 # Cached TimeRE; probably only need one instance ever so cache it for performance
400 _locale_cache
= TimeRE()
401 # Cached regex objects; same reason as for TimeRE cache
402 _regex_cache
= dict()
404 def strptime(data_string
, format
="%a %b %d %H:%M:%S %Y"):
405 """Return a time struct based on the input data and the format string."""
408 locale_time
= _locale_cache
.locale_time
409 # If the language changes, caches are invalidated, so clear them
410 if locale_time
.lang
!= _getlang():
411 _locale_cache
= TimeRE()
413 format_regex
= _regex_cache
.get(format
)
415 # Limit regex cache size to prevent major bloating of the module;
416 # The value 5 is arbitrary
417 if len(_regex_cache
) > 5:
419 format_regex
= _locale_cache
.compile(format
)
420 _regex_cache
[format
] = format_regex
421 found
= format_regex
.match(data_string
)
423 raise ValueError("time data did not match format")
426 hour
= minute
= second
= 0
428 # Defaulted to -1 so as to signal using functions to calc values
429 weekday
= julian
= -1
430 found_dict
= found
.groupdict()
431 for group_key
in found_dict
.iterkeys():
433 year
= int(found_dict
['y'])
434 # Open Group specification for strptime() states that a %y
435 #value in the range of [00, 68] is in the century 2000, while
436 #[69,99] is in the century 1900
441 elif group_key
== 'Y':
442 year
= int(found_dict
['Y'])
443 elif group_key
== 'm':
444 month
= int(found_dict
['m'])
445 elif group_key
== 'B':
446 month
= _insensitiveindex(locale_time
.f_month
, found_dict
['B'])
447 elif group_key
== 'b':
448 month
= _insensitiveindex(locale_time
.a_month
, found_dict
['b'])
449 elif group_key
== 'd':
450 day
= int(found_dict
['d'])
451 elif group_key
is 'H':
452 hour
= int(found_dict
['H'])
453 elif group_key
== 'I':
454 hour
= int(found_dict
['I'])
455 ampm
= found_dict
.get('p', '').lower()
456 # If there was no AM/PM indicator, we'll treat this like AM
457 if ampm
in ('', locale_time
.am_pm
[0].lower()):
458 # We're in AM so the hour is correct unless we're
459 # looking at 12 midnight.
460 # 12 midnight == 12 AM == hour 0
463 elif ampm
== locale_time
.am_pm
[1].lower():
464 # We're in PM so we need to add 12 to the hour unless
465 # we're looking at 12 noon.
466 # 12 noon == 12 PM == hour 12
469 elif group_key
== 'M':
470 minute
= int(found_dict
['M'])
471 elif group_key
== 'S':
472 second
= int(found_dict
['S'])
473 elif group_key
== 'A':
474 weekday
= _insensitiveindex(locale_time
.f_weekday
,
476 elif group_key
== 'a':
477 weekday
= _insensitiveindex(locale_time
.a_weekday
,
479 elif group_key
== 'w':
480 weekday
= int(found_dict
['w'])
485 elif group_key
== 'j':
486 julian
= int(found_dict
['j'])
487 elif group_key
== 'Z':
488 found_zone
= found_dict
['Z'].lower()
489 if locale_time
.timezone
[0] == locale_time
.timezone
[1]:
490 pass #Deals with bad locale setup where timezone info is
491 # the same; first found on FreeBSD 4.4.
492 elif locale_time
.timezone
[0].lower() == found_zone
:
494 elif locale_time
.timezone
[1].lower() == found_zone
:
496 elif locale_time
.timezone
[2].lower() == found_zone
:
498 #XXX <bc>: If calculating fxns are never exposed to the general
499 #populous then just inline calculations. Also might be able to use
500 #``datetime`` and the methods it provides.
502 julian
= julianday(year
, month
, day
)
503 else: # Assuming that if they bothered to include Julian day it will
505 year
, month
, day
= gregorian(julian
, year
)
507 weekday
= dayofweek(year
, month
, day
)
508 return time
.struct_time((year
, month
, day
,
509 hour
, minute
, second
,
510 weekday
, julian
, tz
))
512 def _insensitiveindex(lst
, findme
):
513 # Perform a case-insensitive index search.
515 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
516 # just lowercase when LocaleTime sets its vars and lowercasing
518 findme
= findme
.lower()
519 for key
,item
in enumerate(lst
):
520 if item
.lower() == findme
:
523 raise ValueError("value not in list")
525 def firstjulian(year
):
526 """Calculate the Julian date up until the first of the year."""
527 return ((146097 * (year
+ 4799)) // 400) - 31738
529 def julianday(year
, month
, day
):
530 """Calculate the Julian day since the beginning of the year.
531 Calculated from the Gregorian date.
533 a
= (14 - month
) // 12
535 + (((153 * (month
+ (12 * a
) - 3)) + 2) // 5)
536 + ((146097 * (year
+ 4800 - a
)) // 400)) - firstjulian(year
) + 1
538 def gregorian(julian
, year
):
539 """Return 3-item list containing Gregorian date based on the Julian day."""
540 a
= 32043 + julian
+ firstjulian(year
)
541 b
= ((4 * a
) + 3) // 146097
542 c
= a
- ((146097 * b
) // 4)
543 d
= ((4 * c
) + 3) // 1461
544 e
= c
- ((1461 * d
) // 4)
545 m
= ((5 * e
) + 2) // 153
546 day
= 1 + e
- (((153 * m
) + 2) // 5)
547 month
= m
+ 3 - (12 * (m
// 10))
548 year
= (100 * b
) + d
- 4800 + (m
// 10)
549 return [year
, month
, day
]
551 def dayofweek(year
, month
, day
):
552 """Calculate the day of the week (Monday is 0)."""
553 a
= (14 - month
) // 12
555 weekday
= (day
+ y
+ ((97 * y
) // 400)
556 + ((31 * (month
+ (12 * a
) -2 )) // 12)) % 7