This commit was manufactured by cvs2svn to create tag 'r234c1'.
[python/dscho.git] / Lib / _strptime.py
blob1d05869f0d7216c5e60153de591f838c9fee49ea
1 """Strptime-related classes and functions.
3 CLASSES:
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information as is returned by time.strftime()
8 FUNCTIONS:
9 _getlang -- Figure out what language is being used for the locale
10 strptime -- Calculates the time struct represented by the passed-in string
12 Requires Python 2.2.1 or higher (mainly because of the use of property()).
13 Can be used in Python 2.2 if the following line is added:
14 True = 1; False = 0
15 """
16 import time
17 import locale
18 import calendar
19 from re import compile as re_compile
20 from re import IGNORECASE
21 from datetime import date as datetime_date
23 __author__ = "Brett Cannon"
24 __email__ = "brett@python.org"
26 __all__ = ['strptime']
28 def _getlang():
29 # Figure out what the current language is set to.
30 return locale.getlocale(locale.LC_TIME)
32 class LocaleTime(object):
33 """Stores and handles locale-specific information related to time.
35 This is not thread-safe! Attributes are lazily calculated and no
36 precaution is taken to check to see if the locale information has changed
37 since the creation of the instance in use.
39 ATTRIBUTES (all read-only after instance creation! Instance variables that
40 store the values have mangled names):
41 f_weekday -- full weekday names (7-item list)
42 a_weekday -- abbreviated weekday names (7-item list)
43 f_month -- full month names (13-item list; dummy value in [0], which
44 is added by code)
45 a_month -- abbreviated month names (13-item list, dummy value in
46 [0], which is added by code)
47 am_pm -- AM/PM representation (2-item list)
48 LC_date_time -- format string for date/time representation (string)
49 LC_date -- format string for date representation (string)
50 LC_time -- format string for time representation (string)
51 timezone -- daylight- and non-daylight-savings timezone representation
52 (3-item list; code tacks on blank item at end for
53 possible lack of timezone such as UTC)
54 lang -- Language used by instance (string)
55 """
57 def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
58 a_month=None, am_pm=None, LC_date_time=None, LC_time=None,
59 LC_date=None, timezone=None, lang=None):
60 """Optionally set attributes with passed-in values."""
61 if f_weekday is None:
62 self.__f_weekday = None
63 elif len(f_weekday) == 7:
64 self.__f_weekday = list(f_weekday)
65 else:
66 raise TypeError("full weekday names must be a 7-item sequence")
67 if a_weekday is None:
68 self.__a_weekday = None
69 elif len(a_weekday) == 7:
70 self.__a_weekday = list(a_weekday)
71 else:
72 raise TypeError(
73 "abbreviated weekday names must be a 7-item sequence")
74 if f_month is None:
75 self.__f_month = None
76 elif len(f_month) == 12:
77 self.__f_month = self.__pad(f_month, True)
78 else:
79 raise TypeError("full month names must be a 12-item sequence")
80 if a_month is None:
81 self.__a_month = None
82 elif len(a_month) == 12:
83 self.__a_month = self.__pad(a_month, True)
84 else:
85 raise TypeError(
86 "abbreviated month names must be a 12-item sequence")
87 if am_pm is None:
88 self.__am_pm = None
89 elif len(am_pm) == 2:
90 self.__am_pm = am_pm
91 else:
92 raise TypeError("AM/PM representation must be a 2-item sequence")
93 self.__LC_date_time = LC_date_time
94 self.__LC_time = LC_time
95 self.__LC_date = LC_date
96 self.__timezone = timezone
97 if timezone:
98 if len(timezone) != 2:
99 raise TypeError("timezone names must contain 2 items")
100 else:
101 self.__timezone = self.__pad(timezone, False)
102 if lang:
103 self.__lang = lang
104 else:
105 self.__lang = _getlang()
107 def __pad(self, seq, front):
108 # Add '' to seq to either front (is True), else the back.
109 seq = list(seq)
110 if front:
111 seq.insert(0, '')
112 else:
113 seq.append('')
114 return seq
116 def __set_nothing(self, stuff):
117 # Raise TypeError when trying to set an attribute.
118 raise TypeError("attribute does not support assignment")
120 def __get_f_weekday(self):
121 # Fetch self.f_weekday.
122 if not self.__f_weekday:
123 self.__calc_weekday()
124 return self.__f_weekday
126 def __get_a_weekday(self):
127 # Fetch self.a_weekday.
128 if not self.__a_weekday:
129 self.__calc_weekday()
130 return self.__a_weekday
132 f_weekday = property(__get_f_weekday, __set_nothing,
133 doc="Full weekday names")
134 a_weekday = property(__get_a_weekday, __set_nothing,
135 doc="Abbreviated weekday names")
137 def __get_f_month(self):
138 # Fetch self.f_month.
139 if not self.__f_month:
140 self.__calc_month()
141 return self.__f_month
143 def __get_a_month(self):
144 # Fetch self.a_month.
145 if not self.__a_month:
146 self.__calc_month()
147 return self.__a_month
149 f_month = property(__get_f_month, __set_nothing,
150 doc="Full month names (dummy value at index 0)")
151 a_month = property(__get_a_month, __set_nothing,
152 doc="Abbreviated month names (dummy value at index 0)")
154 def __get_am_pm(self):
155 # Fetch self.am_pm.
156 if not self.__am_pm:
157 self.__calc_am_pm()
158 return self.__am_pm
160 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
162 def __get_timezone(self):
163 # Fetch self.timezone.
164 if not self.__timezone:
165 self.__calc_timezone()
166 return self.__timezone
168 timezone = property(__get_timezone, __set_nothing,
169 doc="Timezone representation (dummy value at index 2)")
171 def __get_LC_date_time(self):
172 # Fetch self.LC_date_time.
173 if not self.__LC_date_time:
174 self.__calc_date_time()
175 return self.__LC_date_time
177 def __get_LC_date(self):
178 # Fetch self.LC_date.
179 if not self.__LC_date:
180 self.__calc_date_time()
181 return self.__LC_date
183 def __get_LC_time(self):
184 # Fetch self.LC_time.
185 if not self.__LC_time:
186 self.__calc_date_time()
187 return self.__LC_time
189 LC_date_time = property(
190 __get_LC_date_time, __set_nothing,
191 doc=
192 "Format string for locale's date/time representation ('%c' format)")
193 LC_date = property(__get_LC_date, __set_nothing,
194 doc="Format string for locale's date representation ('%x' format)")
195 LC_time = property(__get_LC_time, __set_nothing,
196 doc="Format string for locale's time representation ('%X' format)")
198 lang = property(lambda self: self.__lang, __set_nothing,
199 doc="Language used for instance")
201 def __calc_weekday(self):
202 # Set self.__a_weekday and self.__f_weekday using the calendar
203 # module.
204 a_weekday = [calendar.day_abbr[i] for i in range(7)]
205 f_weekday = [calendar.day_name[i] for i in range(7)]
206 if not self.__a_weekday:
207 self.__a_weekday = a_weekday
208 if not self.__f_weekday:
209 self.__f_weekday = f_weekday
211 def __calc_month(self):
212 # Set self.__f_month and self.__a_month using the calendar module.
213 a_month = [calendar.month_abbr[i] for i in range(13)]
214 f_month = [calendar.month_name[i] for i in range(13)]
215 if not self.__a_month:
216 self.__a_month = a_month
217 if not self.__f_month:
218 self.__f_month = f_month
220 def __calc_am_pm(self):
221 # Set self.__am_pm by using time.strftime().
223 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
224 # magical; just happened to have used it everywhere else where a
225 # static date was needed.
226 am_pm = []
227 for hour in (01,22):
228 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
229 am_pm.append(time.strftime("%p", time_tuple))
230 self.__am_pm = am_pm
232 def __calc_date_time(self):
233 # Set self.__date_time, self.__date, & self.__time by using
234 # time.strftime().
236 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
237 # overloaded numbers is minimized. The order in which searches for
238 # values within the format string is very important; it eliminates
239 # possible ambiguity for what something represents.
240 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
241 date_time = [None, None, None]
242 date_time[0] = time.strftime("%c", time_tuple)
243 date_time[1] = time.strftime("%x", time_tuple)
244 date_time[2] = time.strftime("%X", time_tuple)
245 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
246 current_format = date_time[offset]
247 for old, new in (
248 ('%', '%%'), (self.f_weekday[2], '%A'),
249 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
250 (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
251 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'),
252 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
253 ('44', '%M'), ('55', '%S'), ('76', '%j'),
254 ('17', '%d'), ('03', '%m'), ('3', '%m'),
255 # '3' needed for when no leading zero.
256 ('2', '%w'), ('10', '%I')):
257 # Must deal with possible lack of locale info
258 # manifesting itself as the empty string (e.g., Swedish's
259 # lack of AM/PM info) or a platform returning a tuple of empty
260 # strings (e.g., MacOS 9 having timezone as ('','')).
261 if old:
262 current_format = current_format.replace(old, new)
263 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
264 if time.strftime(directive, time_tuple).find('00'):
265 U_W = '%U'
266 else:
267 U_W = '%W'
268 date_time[offset] = current_format.replace('11', U_W)
269 if not self.__LC_date_time:
270 self.__LC_date_time = date_time[0]
271 if not self.__LC_date:
272 self.__LC_date = date_time[1]
273 if not self.__LC_time:
274 self.__LC_time = date_time[2]
276 def __calc_timezone(self):
277 # Set self.__timezone by using time.tzname.
279 # Empty string used for matching when timezone is not used/needed.
280 try:
281 time.tzset()
282 except AttributeError:
283 pass
284 time_zones = ["UTC", "GMT"]
285 if time.daylight:
286 time_zones.extend(time.tzname)
287 else:
288 time_zones.append(time.tzname[0])
289 self.__timezone = self.__pad(time_zones, 0)
292 class TimeRE(dict):
293 """Handle conversion from format directives to regexes."""
295 def __init__(self, locale_time=None):
296 """Init inst with non-locale regexes and store LocaleTime object."""
297 #XXX: Does 'Y' need to worry about having less or more than 4 digits?
298 base = super(TimeRE, self)
299 base.__init__({
300 # The " \d" option is to make %c from ANSI C work
301 'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
302 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
303 'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
304 'j': r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
305 'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
306 'M': r"(?P<M>[0-5]\d|\d)",
307 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
308 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
309 'w': r"(?P<w>[0-6])",
310 # W is set below by using 'U'
311 'y': r"(?P<y>\d\d)",
312 'Y': r"(?P<Y>\d\d\d\d)"})
313 base.__setitem__('W', base.__getitem__('U'))
314 if locale_time:
315 self.locale_time = locale_time
316 else:
317 self.locale_time = LocaleTime()
319 def __getitem__(self, fetch):
320 """Try to fetch regex; if it does not exist, construct it."""
321 try:
322 return super(TimeRE, self).__getitem__(fetch)
323 except KeyError:
324 constructors = {
325 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch),
326 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch),
327 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:],
328 fetch),
329 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:],
330 fetch),
331 'c': lambda: self.pattern(self.locale_time.LC_date_time),
332 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch),
333 'x': lambda: self.pattern(self.locale_time.LC_date),
334 'X': lambda: self.pattern(self.locale_time.LC_time),
335 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch),
336 '%': lambda: '%',
338 if fetch in constructors:
339 self[fetch] = constructors[fetch]()
340 return self[fetch]
341 else:
342 raise
344 def __seqToRE(self, to_convert, directive):
345 """Convert a list to a regex string for matching a directive."""
346 def sorter(a, b):
347 """Sort based on length.
349 Done in case for some strange reason that names in the locale only
350 differ by a suffix and thus want the name with the suffix to match
351 first.
353 try:
354 a_length = len(a)
355 except TypeError:
356 a_length = 0
357 try:
358 b_length = len(b)
359 except TypeError:
360 b_length = 0
361 return cmp(b_length, a_length)
363 to_convert = to_convert[:] # Don't want to change value in-place.
364 for value in to_convert:
365 if value != '':
366 break
367 else:
368 return ''
369 to_convert.sort(sorter)
370 regex = '|'.join(to_convert)
371 regex = '(?P<%s>%s' % (directive, regex)
372 return '%s)' % regex
374 def pattern(self, format):
375 """Return re pattern for the format string.
377 Need to make sure that any characters that might be interpreted as
378 regex syntax is escaped.
381 processed_format = ''
382 # The sub() call escapes all characters that might be misconstrued
383 # as regex syntax.
384 regex_chars = re_compile(r"([\\.^$*+?i\(\){}\[\]|])")
385 format = regex_chars.sub(r"\\\1", format)
386 whitespace_replacement = re_compile('\s+')
387 format = whitespace_replacement.sub('\s*', format)
388 while format.find('%') != -1:
389 directive_index = format.index('%')+1
390 processed_format = "%s%s%s" % (processed_format,
391 format[:directive_index-1],
392 self[format[directive_index]])
393 format = format[directive_index+1:]
394 return "%s%s" % (processed_format, format)
396 def compile(self, format):
397 """Return a compiled re object for the format string."""
398 return re_compile(self.pattern(format), IGNORECASE)
400 # Cached TimeRE; probably only need one instance ever so cache it for performance
401 _locale_cache = TimeRE()
402 # Cached regex objects; same reason as for TimeRE cache
403 _regex_cache = dict()
405 def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
406 """Return a time struct based on the input data and the format string."""
407 global _locale_cache
408 global _regex_cache
409 locale_time = _locale_cache.locale_time
410 # If the language changes, caches are invalidated, so clear them
411 if locale_time.lang != _getlang():
412 _locale_cache = TimeRE()
413 _regex_cache.clear()
414 format_regex = _regex_cache.get(format)
415 if not format_regex:
416 # Limit regex cache size to prevent major bloating of the module;
417 # The value 5 is arbitrary
418 if len(_regex_cache) > 5:
419 _regex_cache.clear()
420 format_regex = _locale_cache.compile(format)
421 _regex_cache[format] = format_regex
422 found = format_regex.match(data_string)
423 if not found:
424 raise ValueError("time data did not match format: data=%s fmt=%s" %
425 (data_string, format))
426 if len(data_string) != found.end():
427 raise ValueError("unconverted data remains: %s" %
428 data_string[found.end():])
429 year = 1900
430 month = day = 1
431 hour = minute = second = 0
432 tz = -1
433 # weekday and julian defaulted to -1 so as to signal need to calculate values
434 weekday = julian = -1
435 found_dict = found.groupdict()
436 for group_key in found_dict.iterkeys():
437 if group_key == 'y':
438 year = int(found_dict['y'])
439 # Open Group specification for strptime() states that a %y
440 #value in the range of [00, 68] is in the century 2000, while
441 #[69,99] is in the century 1900
442 if year <= 68:
443 year += 2000
444 else:
445 year += 1900
446 elif group_key == 'Y':
447 year = int(found_dict['Y'])
448 elif group_key == 'm':
449 month = int(found_dict['m'])
450 elif group_key == 'B':
451 month = _insensitiveindex(locale_time.f_month, found_dict['B'])
452 elif group_key == 'b':
453 month = _insensitiveindex(locale_time.a_month, found_dict['b'])
454 elif group_key == 'd':
455 day = int(found_dict['d'])
456 elif group_key == 'H':
457 hour = int(found_dict['H'])
458 elif group_key == 'I':
459 hour = int(found_dict['I'])
460 ampm = found_dict.get('p', '').lower()
461 # If there was no AM/PM indicator, we'll treat this like AM
462 if ampm in ('', locale_time.am_pm[0].lower()):
463 # We're in AM so the hour is correct unless we're
464 # looking at 12 midnight.
465 # 12 midnight == 12 AM == hour 0
466 if hour == 12:
467 hour = 0
468 elif ampm == locale_time.am_pm[1].lower():
469 # We're in PM so we need to add 12 to the hour unless
470 # we're looking at 12 noon.
471 # 12 noon == 12 PM == hour 12
472 if hour != 12:
473 hour += 12
474 elif group_key == 'M':
475 minute = int(found_dict['M'])
476 elif group_key == 'S':
477 second = int(found_dict['S'])
478 elif group_key == 'A':
479 weekday = _insensitiveindex(locale_time.f_weekday,
480 found_dict['A'])
481 elif group_key == 'a':
482 weekday = _insensitiveindex(locale_time.a_weekday,
483 found_dict['a'])
484 elif group_key == 'w':
485 weekday = int(found_dict['w'])
486 if weekday == 0:
487 weekday = 6
488 else:
489 weekday -= 1
490 elif group_key == 'j':
491 julian = int(found_dict['j'])
492 elif group_key == 'Z':
493 # Since -1 is default value only need to worry about setting tz if
494 # it can be something other than -1.
495 found_zone = found_dict['Z'].lower()
496 if found_zone in ("utc", "gmt"):
497 tz = 0
498 elif time.tzname[0] == time.tzname[1] and \
499 time.daylight:
500 continue #Deals with bad locale setup where timezone info is
501 # the same; first found on FreeBSD 4.4.
502 elif locale_time.timezone[2].lower() == found_zone:
503 tz = 0
504 elif time.daylight and \
505 locale_time.timezone[3].lower() == found_zone:
506 tz = 1
508 # Cannot pre-calculate datetime_date() since can change in Julian
509 #calculation and thus could have different value for the day of the week
510 #calculation
511 if julian == -1:
512 # Need to add 1 to result since first day of the year is 1, not 0.
513 julian = datetime_date(year, month, day).toordinal() - \
514 datetime_date(year, 1, 1).toordinal() + 1
515 else: # Assume that if they bothered to include Julian day it will
516 #be accurate
517 datetime_result = datetime_date.fromordinal((julian - 1) + datetime_date(year, 1, 1).toordinal())
518 year = datetime_result.year
519 month = datetime_result.month
520 day = datetime_result.day
521 if weekday == -1:
522 weekday = datetime_date(year, month, day).weekday()
523 return time.struct_time((year, month, day,
524 hour, minute, second,
525 weekday, julian, tz))
527 def _insensitiveindex(lst, findme):
528 # Perform a case-insensitive index search.
530 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
531 # just lowercase when LocaleTime sets its vars and lowercasing
532 # search values.
533 findme = findme.lower()
534 for key,item in enumerate(lst):
535 if item.lower() == findme:
536 return key
537 else:
538 raise ValueError("value not in list")