append(): Fixing the test for convertability after consultation with
[python/dscho.git] / Lib / _strptime.py
blob5eeb3701ec94e9b33b4a2545c59480f36b1dd215
1 """Strptime-related classes and functions.
3 CLASSES:
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information as is returned by time.strftime()
8 FUNCTIONS:
9 firstjulian -- Calculates the Julian date up to the first of the specified
10 year
11 gregorian -- Calculates the Gregorian date based on the Julian day and
12 year
13 julianday -- Calculates the Julian day since the first of the year based
14 on the Gregorian date
15 dayofweek -- Calculates the day of the week from the Gregorian date.
16 strptime -- Calculates the time struct represented by the passed-in string
18 Requires Python 2.2.1 or higher.
19 Can be used in Python 2.2 if the following line is added:
20 >>> True = 1; False = 0
21 """
22 import time
23 import locale
24 import calendar
25 from re import compile as re_compile
26 from re import IGNORECASE
27 from string import whitespace as whitespace_string
29 __version__ = (2,1,6)
30 __author__ = "Brett Cannon"
31 __email__ = "drifty@bigfoot.com"
33 __all__ = ['strptime']
35 RegexpType = type(re_compile(''))
38 class LocaleTime(object):
39 """Stores and handles locale-specific information related to time.
41 ATTRIBUTES (all read-only after instance creation! Instance variables that
42 store the values have mangled names):
43 f_weekday -- full weekday names (7-item list)
44 a_weekday -- abbreviated weekday names (7-item list)
45 f_month -- full weekday names (14-item list; dummy value in [0], which
46 is added by code)
47 a_month -- abbreviated weekday names (13-item list, dummy value in
48 [0], which is added by code)
49 am_pm -- AM/PM representation (2-item list)
50 LC_date_time -- format string for date/time representation (string)
51 LC_date -- format string for date representation (string)
52 LC_time -- format string for time representation (string)
53 timezone -- daylight- and non-daylight-savings timezone representation
54 (3-item list; code tacks on blank item at end for
55 possible lack of timezone such as UTC)
56 lang -- Language used by instance (string)
57 """
59 def __init__(self, f_weekday=None, a_weekday=None, f_month=None,
60 a_month=None, am_pm=None, LC_date_time=None, LC_time=None,
61 LC_date=None, timezone=None, lang=None):
62 """Optionally set attributes with passed-in values."""
63 if f_weekday is None:
64 self.__f_weekday = None
65 elif len(f_weekday) == 7:
66 self.__f_weekday = list(f_weekday)
67 else:
68 raise TypeError("full weekday names must be a 7-item sequence")
69 if a_weekday is None:
70 self.__a_weekday = None
71 elif len(a_weekday) == 7:
72 self.__a_weekday = list(a_weekday)
73 else:
74 raise TypeError(
75 "abbreviated weekday names must be a 7-item sequence")
76 if f_month is None:
77 self.__f_month = None
78 elif len(f_month) == 12:
79 self.__f_month = self.__pad(f_month, True)
80 else:
81 raise TypeError("full month names must be a 12-item sequence")
82 if a_month is None:
83 self.__a_month = None
84 elif len(a_month) == 12:
85 self.__a_month = self.__pad(a_month, True)
86 else:
87 raise TypeError(
88 "abbreviated month names must be a 12-item sequence")
89 if am_pm is None:
90 self.__am_pm = None
91 elif len(am_pm) == 2:
92 self.__am_pm = am_pm
93 else:
94 raise TypeError("AM/PM representation must be a 2-item sequence")
95 self.__LC_date_time = LC_date_time
96 self.__LC_time = LC_time
97 self.__LC_date = LC_date
98 self.__timezone = timezone
99 if timezone:
100 if len(timezone) != 2:
101 raise TypeError("timezone names must contain 2 items")
102 else:
103 self.__timezone = self.__pad(timezone, False)
104 self.__lang = lang
106 def __pad(self, seq, front):
107 # Add '' to seq to either front (is True), else the back.
108 seq = list(seq)
109 if front:
110 seq.insert(0, '')
111 else:
112 seq.append('')
113 return seq
115 def __set_nothing(self, stuff):
116 # Raise TypeError when trying to set an attribute.
117 raise TypeError("attribute does not support assignment")
119 def __get_f_weekday(self):
120 # Fetch self.f_weekday.
121 if not self.__f_weekday:
122 self.__calc_weekday()
123 return self.__f_weekday
125 def __get_a_weekday(self):
126 # Fetch self.a_weekday.
127 if not self.__a_weekday:
128 self.__calc_weekday()
129 return self.__a_weekday
131 f_weekday = property(__get_f_weekday, __set_nothing,
132 doc="Full weekday names")
133 a_weekday = property(__get_a_weekday, __set_nothing,
134 doc="Abbreviated weekday names")
136 def __get_f_month(self):
137 # Fetch self.f_month.
138 if not self.__f_month:
139 self.__calc_month()
140 return self.__f_month
142 def __get_a_month(self):
143 # Fetch self.a_month.
144 if not self.__a_month:
145 self.__calc_month()
146 return self.__a_month
148 f_month = property(__get_f_month, __set_nothing,
149 doc="Full month names (dummy value at index 0)")
150 a_month = property(__get_a_month, __set_nothing,
151 doc="Abbreviated month names (dummy value at index 0)")
153 def __get_am_pm(self):
154 # Fetch self.am_pm.
155 if not self.__am_pm:
156 self.__calc_am_pm()
157 return self.__am_pm
159 am_pm = property(__get_am_pm, __set_nothing, doc="AM/PM representation")
161 def __get_timezone(self):
162 # Fetch self.timezone.
163 if not self.__timezone:
164 self.__calc_timezone()
165 return self.__timezone
167 timezone = property(__get_timezone, __set_nothing,
168 doc="Timezone representation (dummy value at index 2)")
170 def __get_LC_date_time(self):
171 # Fetch self.LC_date_time.
172 if not self.__LC_date_time:
173 self.__calc_date_time()
174 return self.__LC_date_time
176 def __get_LC_date(self):
177 # Fetch self.LC_date.
178 if not self.__LC_date:
179 self.__calc_date_time()
180 return self.__LC_date
182 def __get_LC_time(self):
183 # Fetch self.LC_time.
184 if not self.__LC_time:
185 self.__calc_date_time()
186 return self.__LC_time
188 LC_date_time = property(
189 __get_LC_date_time, __set_nothing,
190 doc=
191 "Format string for locale's date/time representation ('%c' format)")
192 LC_date = property(__get_LC_date, __set_nothing,
193 doc="Format string for locale's date representation ('%x' format)")
194 LC_time = property(__get_LC_time, __set_nothing,
195 doc="Format string for locale's time representation ('%X' format)")
197 def __get_lang(self):
198 # Fetch self.lang.
199 if not self.__lang:
200 self.__calc_lang()
201 return self.__lang
203 lang = property(__get_lang, __set_nothing,
204 doc="Language used for instance")
206 def __calc_weekday(self):
207 # Set self.__a_weekday and self.__f_weekday using the calendar
208 # module.
209 a_weekday = [calendar.day_abbr[i] for i in range(7)]
210 f_weekday = [calendar.day_name[i] for i in range(7)]
211 if not self.__a_weekday:
212 self.__a_weekday = a_weekday
213 if not self.__f_weekday:
214 self.__f_weekday = f_weekday
216 def __calc_month(self):
217 # Set self.__f_month and self.__a_month using the calendar module.
218 a_month = [calendar.month_abbr[i] for i in range(13)]
219 f_month = [calendar.month_name[i] for i in range(13)]
220 if not self.__a_month:
221 self.__a_month = a_month
222 if not self.__f_month:
223 self.__f_month = f_month
225 def __calc_am_pm(self):
226 # Set self.__am_pm by using time.strftime().
228 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
229 # magical; just happened to have used it everywhere else where a
230 # static date was needed.
231 am_pm = []
232 for hour in (01,22):
233 time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0))
234 am_pm.append(time.strftime("%p", time_tuple))
235 self.__am_pm = am_pm
237 def __calc_date_time(self):
238 # Set self.__date_time, self.__date, & self.__time by using
239 # time.strftime().
241 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
242 # overloaded numbers is minimized. The order in which searches for
243 # values within the format string is very important; it eliminates
244 # possible ambiguity for what something represents.
245 time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0))
246 date_time = [None, None, None]
247 date_time[0] = time.strftime("%c", time_tuple)
248 date_time[1] = time.strftime("%x", time_tuple)
249 date_time[2] = time.strftime("%X", time_tuple)
250 for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')):
251 current_format = date_time[offset]
252 for old, new in (
253 ('%', '%%'), (self.f_weekday[2], '%A'),
254 (self.f_month[3], '%B'), (self.a_weekday[2], '%a'),
255 (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
256 (self.timezone[0], '%Z'), (self.timezone[1], '%Z'),
257 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
258 ('44', '%M'), ('55', '%S'), ('76', '%j'),
259 ('17', '%d'), ('03', '%m'), ('3', '%m'),
260 # '3' needed for when no leading zero.
261 ('2', '%w'), ('10', '%I')):
262 try:
263 # Done this way to deal with possible lack of locale info
264 # manifesting itself as the empty string (i.e., Swedish's
265 # lack of AM/PM info).
266 current_format = current_format.replace(old, new)
267 except ValueError:
268 pass
269 time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0))
270 if time.strftime(directive, time_tuple).find('00'):
271 U_W = '%U'
272 else:
273 U_W = '%W'
274 date_time[offset] = current_format.replace('11', U_W)
275 if not self.__LC_date_time:
276 self.__LC_date_time = date_time[0]
277 if not self.__LC_date:
278 self.__LC_date = date_time[1]
279 if not self.__LC_time:
280 self.__LC_time = date_time[2]
282 def __calc_timezone(self):
283 # Set self.__timezone by using time.tzname.
285 # Empty string used for matching when timezone is not used/needed such
286 # as with UTC.
287 self.__timezone = self.__pad(time.tzname, 0)
289 def __calc_lang(self):
290 # Set self.lang by using locale.getlocale() or
291 # locale.getdefaultlocale().
292 current_lang = locale.getlocale(locale.LC_TIME)[0]
293 if current_lang:
294 self.__lang = current_lang
295 else:
296 self.__lang = locale.getdefaultlocale()[0]
299 class TimeRE(dict):
300 """Handle conversion from format directives to regexes."""
302 def __init__(self, locale_time=LocaleTime()):
303 """Init inst with non-locale regexes and store LocaleTime object."""
304 super(TimeRE,self).__init__({
305 # The " \d" option is to make %c from ANSI C work
306 'd': r"(?P<d>3[0-1]|[0-2]\d|\d| \d)",
307 'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
308 'I': r"(?P<I>0\d|1[0-2]|\d)",
309 'j': r"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)",
310 'm': r"(?P<m>0\d|1[0-2]|\d)",
311 'M': r"(?P<M>[0-5]\d|\d)",
312 'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
313 'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
314 'w': r"(?P<w>[0-6])",
315 'W': r"(?P<W>5[0-3]|[0-4]\d|\d)", # Same as U
316 'y': r"(?P<y>\d\d)",
317 'Y': r"(?P<Y>\d\d\d\d)"})
318 self.locale_time = locale_time
320 def __getitem__(self, fetch):
321 """Try to fetch regex; if it does not exist, construct it."""
322 try:
323 return super(TimeRE, self).__getitem__(fetch)
324 except KeyError:
325 constructors = {
326 'A': lambda: self.__seqToRE(self.locale_time.f_weekday, fetch),
327 'a': lambda: self.__seqToRE(self.locale_time.a_weekday, fetch),
328 'B': lambda: self.__seqToRE(self.locale_time.f_month[1:],
329 fetch),
330 'b': lambda: self.__seqToRE(self.locale_time.a_month[1:],
331 fetch),
332 'c': lambda: self.pattern(self.locale_time.LC_date_time),
333 'p': lambda: self.__seqToRE(self.locale_time.am_pm, fetch),
334 'x': lambda: self.pattern(self.locale_time.LC_date),
335 'X': lambda: self.pattern(self.locale_time.LC_time),
336 'Z': lambda: self.__seqToRE(self.locale_time.timezone, fetch),
337 '%': lambda: '%',
339 if fetch in constructors:
340 self[fetch] = constructors[fetch]()
341 return self[fetch]
342 else:
343 raise
345 def __seqToRE(self, to_convert, directive):
346 """Convert a list to a regex string for matching directive."""
347 def sorter(a, b):
348 """Sort based on length.
350 Done in case for some strange reason that names in the locale only
351 differ by a suffix and thus want the name with the suffix to match
352 first.
354 try:
355 a_length = len(a)
356 except TypeError:
357 a_length = 0
358 try:
359 b_length = len(b)
360 except TypeError:
361 b_length = 0
362 return cmp(b_length, a_length)
364 to_convert = to_convert[:] # Don't want to change value in-place.
365 to_convert.sort(sorter)
366 regex = '|'.join(to_convert)
367 regex = '(?P<%s>%s' % (directive, regex)
368 return '%s)' % regex
370 def pattern(self, format):
371 """Return re pattern for the format string."""
372 processed_format = ''
373 for whitespace in whitespace_string:
374 format = format.replace(whitespace, r'\s*')
375 while format.find('%') != -1:
376 directive_index = format.index('%')+1
377 processed_format = "%s%s%s" % (processed_format,
378 format[:directive_index-1],
379 self[format[directive_index]])
380 format = format[directive_index+1:]
381 return "%s%s" % (processed_format, format)
383 def compile(self, format):
384 """Return a compiled re object for the format string."""
385 format = "(?#%s)%s" % (self.locale_time.lang,format)
386 return re_compile(self.pattern(format), IGNORECASE)
389 def strptime(data_string, format="%a %b %d %H:%M:%S %Y"):
390 """Return a time struct based on the input data and the format string.
392 The format argument may either be a regular expression object compiled by
393 strptime(), or a format string. If False is passed in for data_string
394 then the re object calculated for format will be returned. The re object
395 must be used with the same locale as was used to compile the re object.
397 locale_time = LocaleTime()
398 if isinstance(format, RegexpType):
399 if format.pattern.find(locale_time.lang) == -1:
400 raise TypeError("re object not created with same language as "
401 "LocaleTime instance")
402 else:
403 compiled_re = format
404 else:
405 compiled_re = TimeRE(locale_time).compile(format)
406 if data_string is False:
407 return compiled_re
408 else:
409 found = compiled_re.match(data_string)
410 if not found:
411 raise ValueError("time data did not match format")
412 year = month = day = hour = minute = second = weekday = julian = tz =-1
413 found_dict = found.groupdict()
414 for group_key in found_dict.iterkeys():
415 if group_key == 'y':
416 year = int("%s%s" %
417 (time.strftime("%Y")[:-2], found_dict['y']))
418 elif group_key == 'Y':
419 year = int(found_dict['Y'])
420 elif group_key == 'm':
421 month = int(found_dict['m'])
422 elif group_key == 'B':
423 month = _insensitiveindex(locale_time.f_month, found_dict['B'])
424 elif group_key == 'b':
425 month = _insensitiveindex(locale_time.a_month, found_dict['b'])
426 elif group_key == 'd':
427 day = int(found_dict['d'])
428 elif group_key is 'H':
429 hour = int(found_dict['H'])
430 elif group_key == 'I':
431 hour = int(found_dict['I'])
432 ampm = found_dict.get('p', '').lower()
433 # If there was no AM/PM indicator, we'll treat this like AM
434 if ampm in ('', locale_time.am_pm[0].lower()):
435 # We're in AM so the hour is correct unless we're
436 # looking at 12 midnight.
437 # 12 midnight == 12 AM == hour 0
438 if hour == 12:
439 hour = 0
440 elif ampm == locale_time.am_pm[1].lower():
441 # We're in PM so we need to add 12 to the hour unless
442 # we're looking at 12 noon.
443 # 12 noon == 12 PM == hour 12
444 if hour != 12:
445 hour += 12
446 elif group_key == 'M':
447 minute = int(found_dict['M'])
448 elif group_key == 'S':
449 second = int(found_dict['S'])
450 elif group_key == 'A':
451 weekday = _insensitiveindex(locale_time.f_weekday,
452 found_dict['A'])
453 elif group_key == 'a':
454 weekday = _insensitiveindex(locale_time.a_weekday,
455 found_dict['a'])
456 elif group_key == 'w':
457 weekday = int(found_dict['w'])
458 if weekday == 0:
459 weekday = 6
460 else:
461 weekday -= 1
462 elif group_key == 'j':
463 julian = int(found_dict['j'])
464 elif group_key == 'Z':
465 found_zone = found_dict['Z'].lower()
466 if locale_time.timezone[0].lower() == found_zone:
467 tz = 0
468 elif locale_time.timezone[1].lower() == found_zone:
469 tz = 1
470 elif locale_time.timezone[2].lower() == found_zone:
471 tz = 0
472 #XXX <bc>: If calculating fxns are never exposed to the general
473 # populous then just inline calculations.
474 if julian == -1 and year != -1 and month != -1 and day != -1:
475 julian = julianday(year, month, day)
476 if (month == -1 or day == -1) and julian != -1 and year != -1:
477 year, month, day = gregorian(julian, year)
478 if weekday == -1 and year != -1 and month != -1 and day != -1:
479 weekday = dayofweek(year, month, day)
480 return time.struct_time(
481 (year,month,day,hour,minute,second,weekday, julian,tz))
483 def _insensitiveindex(lst, findme):
484 # Perform a case-insensitive index search.
486 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
487 # just lowercase when LocaleTime sets its vars and lowercasing
488 # search values.
489 findme = findme.lower()
490 for key,item in enumerate(lst):
491 if item.lower() == findme:
492 return key
493 else:
494 raise ValueError("value not in list")
496 def firstjulian(year):
497 """Calculate the Julian date up until the first of the year."""
498 return ((146097 * (year + 4799)) // 400) - 31738
500 def julianday(year, month, day):
501 """Calculate the Julian day since the beginning of the year.
502 Calculated from the Gregorian date.
504 a = (14 - month) // 12
505 return (day - 32045
506 + (((153 * (month + (12 * a) - 3)) + 2) // 5)
507 + ((146097 * (year + 4800 - a)) // 400)) - firstjulian(year) + 1
509 def gregorian(julian, year):
510 """Return 3-item list containing Gregorian date based on the Julian day."""
511 a = 32043 + julian + firstjulian(year)
512 b = ((4 * a) + 3) // 146097
513 c = a - ((146097 * b) // 4)
514 d = ((4 * c) + 3) // 1461
515 e = c - ((1461 * d) // 4)
516 m = ((5 * e) + 2) // 153
517 day = 1 + e - (((153 * m) + 2) // 5)
518 month = m + 3 - (12 * (m // 10))
519 year = (100 * b) + d - 4800 + (m // 10)
520 return [year, month, day]
522 def dayofweek(year, month, day):
523 """Calculate the day of the week (Monday is 0)."""
524 a = (14 - month) // 12
525 y = year - a
526 weekday = (day + y + ((97 * y) // 400)
527 + ((31 * (month + (12 * a) -2 )) // 12)) % 7
528 if weekday == 0:
529 return 6
530 else:
531 return weekday-1