1 """Strptime-related classes and functions.
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information as is returned by time.strftime()
9 _getlang -- Figure out what language is being used for the locale
10 strptime -- Calculates the time struct represented by the passed-in string
12 Requires Python 2.2.1 or higher (mainly because of the use of property()).
13 Can be used in Python 2.2 if the following line is added:
19 from re
import compile as re_compile
20 from re
import IGNORECASE
21 from datetime
import date
as datetime_date
23 __author__
= "Brett Cannon"
24 __email__
= "brett@python.org"
26 __all__
= ['strptime']
29 # Figure out what the current language is set to.
30 current_lang
= locale
.getlocale(locale
.LC_TIME
)[0]
34 current_lang
= locale
.getdefaultlocale()[0]
40 class LocaleTime(object):
41 """Stores and handles locale-specific information related to time.
43 ATTRIBUTES (all read-only after instance creation! Instance variables that
44 store the values have mangled names):
45 f_weekday -- full weekday names (7-item list)
46 a_weekday -- abbreviated weekday names (7-item list)
47 f_month -- full weekday names (14-item list; dummy value in [0], which
49 a_month -- abbreviated weekday names (13-item list, dummy value in
50 [0], which is added by code)
51 am_pm -- AM/PM representation (2-item list)
52 LC_date_time -- format string for date/time representation (string)
53 LC_date -- format string for date representation (string)
54 LC_time -- format string for time representation (string)
55 timezone -- daylight- and non-daylight-savings timezone representation
56 (3-item list; code tacks on blank item at end for
57 possible lack of timezone such as UTC)
58 lang -- Language used by instance (string)
61 def __init__(self
, f_weekday
=None, a_weekday
=None, f_month
=None,
62 a_month
=None, am_pm
=None, LC_date_time
=None, LC_time
=None,
63 LC_date
=None, timezone
=None, lang
=None):
64 """Optionally set attributes with passed-in values."""
66 self
.__f
_weekday
= None
67 elif len(f_weekday
) == 7:
68 self
.__f
_weekday
= list(f_weekday
)
70 raise TypeError("full weekday names must be a 7-item sequence")
72 self
.__a
_weekday
= None
73 elif len(a_weekday
) == 7:
74 self
.__a
_weekday
= list(a_weekday
)
77 "abbreviated weekday names must be a 7-item sequence")
80 elif len(f_month
) == 12:
81 self
.__f
_month
= self
.__pad
(f_month
, True)
83 raise TypeError("full month names must be a 12-item sequence")
86 elif len(a_month
) == 12:
87 self
.__a
_month
= self
.__pad
(a_month
, True)
90 "abbreviated month names must be a 12-item sequence")
96 raise TypeError("AM/PM representation must be a 2-item sequence")
97 self
.__LC
_date
_time
= LC_date_time
98 self
.__LC
_time
= LC_time
99 self
.__LC
_date
= LC_date
100 self
.__timezone
= timezone
102 if len(timezone
) != 2:
103 raise TypeError("timezone names must contain 2 items")
105 self
.__timezone
= self
.__pad
(timezone
, False)
108 def __pad(self
, seq
, front
):
109 # Add '' to seq to either front (is True), else the back.
117 def __set_nothing(self
, stuff
):
118 # Raise TypeError when trying to set an attribute.
119 raise TypeError("attribute does not support assignment")
121 def __get_f_weekday(self
):
122 # Fetch self.f_weekday.
123 if not self
.__f
_weekday
:
124 self
.__calc
_weekday
()
125 return self
.__f
_weekday
127 def __get_a_weekday(self
):
128 # Fetch self.a_weekday.
129 if not self
.__a
_weekday
:
130 self
.__calc
_weekday
()
131 return self
.__a
_weekday
133 f_weekday
= property(__get_f_weekday
, __set_nothing
,
134 doc
="Full weekday names")
135 a_weekday
= property(__get_a_weekday
, __set_nothing
,
136 doc
="Abbreviated weekday names")
138 def __get_f_month(self
):
139 # Fetch self.f_month.
140 if not self
.__f
_month
:
142 return self
.__f
_month
144 def __get_a_month(self
):
145 # Fetch self.a_month.
146 if not self
.__a
_month
:
148 return self
.__a
_month
150 f_month
= property(__get_f_month
, __set_nothing
,
151 doc
="Full month names (dummy value at index 0)")
152 a_month
= property(__get_a_month
, __set_nothing
,
153 doc
="Abbreviated month names (dummy value at index 0)")
155 def __get_am_pm(self
):
161 am_pm
= property(__get_am_pm
, __set_nothing
, doc
="AM/PM representation")
163 def __get_timezone(self
):
164 # Fetch self.timezone.
165 if not self
.__timezone
:
166 self
.__calc
_timezone
()
167 return self
.__timezone
169 timezone
= property(__get_timezone
, __set_nothing
,
170 doc
="Timezone representation (dummy value at index 2)")
172 def __get_LC_date_time(self
):
173 # Fetch self.LC_date_time.
174 if not self
.__LC
_date
_time
:
175 self
.__calc
_date
_time
()
176 return self
.__LC
_date
_time
178 def __get_LC_date(self
):
179 # Fetch self.LC_date.
180 if not self
.__LC
_date
:
181 self
.__calc
_date
_time
()
182 return self
.__LC
_date
184 def __get_LC_time(self
):
185 # Fetch self.LC_time.
186 if not self
.__LC
_time
:
187 self
.__calc
_date
_time
()
188 return self
.__LC
_time
190 LC_date_time
= property(
191 __get_LC_date_time
, __set_nothing
,
193 "Format string for locale's date/time representation ('%c' format)")
194 LC_date
= property(__get_LC_date
, __set_nothing
,
195 doc
="Format string for locale's date representation ('%x' format)")
196 LC_time
= property(__get_LC_time
, __set_nothing
,
197 doc
="Format string for locale's time representation ('%X' format)")
199 def __get_lang(self
):
205 lang
= property(__get_lang
, __set_nothing
,
206 doc
="Language used for instance")
208 def __calc_weekday(self
):
209 # Set self.__a_weekday and self.__f_weekday using the calendar
211 a_weekday
= [calendar
.day_abbr
[i
] for i
in range(7)]
212 f_weekday
= [calendar
.day_name
[i
] for i
in range(7)]
213 if not self
.__a
_weekday
:
214 self
.__a
_weekday
= a_weekday
215 if not self
.__f
_weekday
:
216 self
.__f
_weekday
= f_weekday
218 def __calc_month(self
):
219 # Set self.__f_month and self.__a_month using the calendar module.
220 a_month
= [calendar
.month_abbr
[i
] for i
in range(13)]
221 f_month
= [calendar
.month_name
[i
] for i
in range(13)]
222 if not self
.__a
_month
:
223 self
.__a
_month
= a_month
224 if not self
.__f
_month
:
225 self
.__f
_month
= f_month
227 def __calc_am_pm(self
):
228 # Set self.__am_pm by using time.strftime().
230 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
231 # magical; just happened to have used it everywhere else where a
232 # static date was needed.
235 time_tuple
= time
.struct_time((1999,3,17,hour
,44,55,2,76,0))
236 am_pm
.append(time
.strftime("%p", time_tuple
))
239 def __calc_date_time(self
):
240 # Set self.__date_time, self.__date, & self.__time by using
243 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
244 # overloaded numbers is minimized. The order in which searches for
245 # values within the format string is very important; it eliminates
246 # possible ambiguity for what something represents.
247 time_tuple
= time
.struct_time((1999,3,17,22,44,55,2,76,0))
248 date_time
= [None, None, None]
249 date_time
[0] = time
.strftime("%c", time_tuple
)
250 date_time
[1] = time
.strftime("%x", time_tuple
)
251 date_time
[2] = time
.strftime("%X", time_tuple
)
252 for offset
,directive
in ((0,'%c'), (1,'%x'), (2,'%X')):
253 current_format
= date_time
[offset
]
255 ('%', '%%'), (self
.f_weekday
[2], '%A'),
256 (self
.f_month
[3], '%B'), (self
.a_weekday
[2], '%a'),
257 (self
.a_month
[3], '%b'), (self
.am_pm
[1], '%p'),
258 (self
.timezone
[0], '%Z'), (self
.timezone
[1], '%Z'),
259 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
260 ('44', '%M'), ('55', '%S'), ('76', '%j'),
261 ('17', '%d'), ('03', '%m'), ('3', '%m'),
262 # '3' needed for when no leading zero.
263 ('2', '%w'), ('10', '%I')):
264 # Must deal with possible lack of locale info
265 # manifesting itself as the empty string (e.g., Swedish's
266 # lack of AM/PM info) or a platform returning a tuple of empty
267 # strings (e.g., MacOS 9 having timezone as ('','')).
269 current_format
= current_format
.replace(old
, new
)
270 time_tuple
= time
.struct_time((1999,1,3,1,1,1,6,3,0))
271 if time
.strftime(directive
, time_tuple
).find('00'):
275 date_time
[offset
] = current_format
.replace('11', U_W
)
276 if not self
.__LC
_date
_time
:
277 self
.__LC
_date
_time
= date_time
[0]
278 if not self
.__LC
_date
:
279 self
.__LC
_date
= date_time
[1]
280 if not self
.__LC
_time
:
281 self
.__LC
_time
= date_time
[2]
283 def __calc_timezone(self
):
284 # Set self.__timezone by using time.tzname.
286 # Empty string used for matching when timezone is not used/needed.
289 except AttributeError:
291 time_zones
= ["UTC", "GMT"]
293 time_zones
.extend(time
.tzname
)
295 time_zones
.append(time
.tzname
[0])
296 self
.__timezone
= self
.__pad
(time_zones
, 0)
298 def __calc_lang(self
):
299 # Set self.__lang by using __getlang().
300 self
.__lang
= _getlang()
305 """Handle conversion from format directives to regexes."""
307 def __init__(self
, locale_time
=LocaleTime()):
308 """Init inst with non-locale regexes and store LocaleTime object."""
309 #XXX: Does 'Y' need to worry about having less or more than 4 digits?
310 base
= super(TimeRE
, self
)
312 # The " \d" option is to make %c from ANSI C work
313 'd': r
"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
314 'H': r
"(?P<H>2[0-3]|[0-1]\d|\d)",
315 'I': r
"(?P<I>1[0-2]|0[1-9]|[1-9])",
316 'j': r
"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
317 'm': r
"(?P<m>1[0-2]|0[1-9]|[1-9])",
318 'M': r
"(?P<M>[0-5]\d|\d)",
319 'S': r
"(?P<S>6[0-1]|[0-5]\d|\d)",
320 'U': r
"(?P<U>5[0-3]|[0-4]\d|\d)",
321 'w': r
"(?P<w>[0-6])",
322 # W is set below by using 'U'
324 'Y': r
"(?P<Y>\d\d\d\d)"})
325 base
.__setitem
__('W', base
.__getitem
__('U'))
326 self
.locale_time
= locale_time
328 def __getitem__(self
, fetch
):
329 """Try to fetch regex; if it does not exist, construct it."""
331 return super(TimeRE
, self
).__getitem
__(fetch
)
334 'A': lambda: self
.__seqToRE
(self
.locale_time
.f_weekday
, fetch
),
335 'a': lambda: self
.__seqToRE
(self
.locale_time
.a_weekday
, fetch
),
336 'B': lambda: self
.__seqToRE
(self
.locale_time
.f_month
[1:],
338 'b': lambda: self
.__seqToRE
(self
.locale_time
.a_month
[1:],
340 'c': lambda: self
.pattern(self
.locale_time
.LC_date_time
),
341 'p': lambda: self
.__seqToRE
(self
.locale_time
.am_pm
, fetch
),
342 'x': lambda: self
.pattern(self
.locale_time
.LC_date
),
343 'X': lambda: self
.pattern(self
.locale_time
.LC_time
),
344 'Z': lambda: self
.__seqToRE
(self
.locale_time
.timezone
, fetch
),
347 if fetch
in constructors
:
348 self
[fetch
] = constructors
[fetch
]()
353 def __seqToRE(self
, to_convert
, directive
):
354 """Convert a list to a regex string for matching a directive."""
356 """Sort based on length.
358 Done in case for some strange reason that names in the locale only
359 differ by a suffix and thus want the name with the suffix to match
370 return cmp(b_length
, a_length
)
372 to_convert
= to_convert
[:] # Don't want to change value in-place.
373 for value
in to_convert
:
378 to_convert
.sort(sorter
)
379 regex
= '|'.join(to_convert
)
380 regex
= '(?P<%s>%s' % (directive
, regex
)
383 def pattern(self
, format
):
384 """Return re pattern for the format string.
386 Need to make sure that any characters that might be interpreted as
387 regex syntax is escaped.
390 processed_format
= ''
391 # The sub() call escapes all characters that might be misconstrued
393 regex_chars
= re_compile(r
"([\\.^$*+?{}\[\]|])")
394 format
= regex_chars
.sub(r
"\\\1", format
)
395 whitespace_replacement
= re_compile('\s+')
396 format
= whitespace_replacement
.sub('\s*', format
)
397 while format
.find('%') != -1:
398 directive_index
= format
.index('%')+1
399 processed_format
= "%s%s%s" % (processed_format
,
400 format
[:directive_index
-1],
401 self
[format
[directive_index
]])
402 format
= format
[directive_index
+1:]
403 return "%s%s" % (processed_format
, format
)
405 def compile(self
, format
):
406 """Return a compiled re object for the format string."""
407 return re_compile(self
.pattern(format
), IGNORECASE
)
409 # Cached TimeRE; probably only need one instance ever so cache it for performance
410 _locale_cache
= TimeRE()
411 # Cached regex objects; same reason as for TimeRE cache
412 _regex_cache
= dict()
414 def strptime(data_string
, format
="%a %b %d %H:%M:%S %Y"):
415 """Return a time struct based on the input data and the format string."""
418 locale_time
= _locale_cache
.locale_time
419 # If the language changes, caches are invalidated, so clear them
420 if locale_time
.lang
!= _getlang():
421 _locale_cache
= TimeRE()
423 format_regex
= _regex_cache
.get(format
)
425 # Limit regex cache size to prevent major bloating of the module;
426 # The value 5 is arbitrary
427 if len(_regex_cache
) > 5:
429 format_regex
= _locale_cache
.compile(format
)
430 _regex_cache
[format
] = format_regex
431 found
= format_regex
.match(data_string
)
433 raise ValueError("time data did not match format")
434 if len(data_string
) != found
.end():
435 raise ValueError("unconverted data remains: %s" %
436 data_string
[found
.end():])
439 hour
= minute
= second
= 0
441 # weekday and julian defaulted to -1 so as to signal need to calculate values
442 weekday
= julian
= -1
443 found_dict
= found
.groupdict()
444 for group_key
in found_dict
.iterkeys():
446 year
= int(found_dict
['y'])
447 # Open Group specification for strptime() states that a %y
448 #value in the range of [00, 68] is in the century 2000, while
449 #[69,99] is in the century 1900
454 elif group_key
== 'Y':
455 year
= int(found_dict
['Y'])
456 elif group_key
== 'm':
457 month
= int(found_dict
['m'])
458 elif group_key
== 'B':
459 month
= _insensitiveindex(locale_time
.f_month
, found_dict
['B'])
460 elif group_key
== 'b':
461 month
= _insensitiveindex(locale_time
.a_month
, found_dict
['b'])
462 elif group_key
== 'd':
463 day
= int(found_dict
['d'])
464 elif group_key
is 'H':
465 hour
= int(found_dict
['H'])
466 elif group_key
== 'I':
467 hour
= int(found_dict
['I'])
468 ampm
= found_dict
.get('p', '').lower()
469 # If there was no AM/PM indicator, we'll treat this like AM
470 if ampm
in ('', locale_time
.am_pm
[0].lower()):
471 # We're in AM so the hour is correct unless we're
472 # looking at 12 midnight.
473 # 12 midnight == 12 AM == hour 0
476 elif ampm
== locale_time
.am_pm
[1].lower():
477 # We're in PM so we need to add 12 to the hour unless
478 # we're looking at 12 noon.
479 # 12 noon == 12 PM == hour 12
482 elif group_key
== 'M':
483 minute
= int(found_dict
['M'])
484 elif group_key
== 'S':
485 second
= int(found_dict
['S'])
486 elif group_key
== 'A':
487 weekday
= _insensitiveindex(locale_time
.f_weekday
,
489 elif group_key
== 'a':
490 weekday
= _insensitiveindex(locale_time
.a_weekday
,
492 elif group_key
== 'w':
493 weekday
= int(found_dict
['w'])
498 elif group_key
== 'j':
499 julian
= int(found_dict
['j'])
500 elif group_key
== 'Z':
501 # Since -1 is default value only need to worry about setting tz if
502 # it can be something other than -1.
503 found_zone
= found_dict
['Z'].lower()
504 if locale_time
.timezone
[0] == locale_time
.timezone
[1]:
505 pass #Deals with bad locale setup where timezone info is
506 # the same; first found on FreeBSD 4.4.
507 elif found_zone
in ("utc", "gmt"):
509 elif locale_time
.timezone
[2].lower() == found_zone
:
512 if locale_time
.timezone
[3].lower() == found_zone
:
515 # Cannot pre-calculate datetime_date() since can change in Julian
516 #calculation and thus could have different value for the day of the week
519 # Need to add 1 to result since first day of the year is 1, not 0.
520 julian
= datetime_date(year
, month
, day
).toordinal() - \
521 datetime_date(year
, 1, 1).toordinal() + 1
522 else: # Assume that if they bothered to include Julian day it will
524 datetime_result
= datetime_date
.fromordinal((julian
- 1) + datetime_date(year
, 1, 1).toordinal())
525 year
= datetime_result
.year
526 month
= datetime_result
.month
527 day
= datetime_result
.day
529 weekday
= datetime_date(year
, month
, day
).weekday()
530 return time
.struct_time((year
, month
, day
,
531 hour
, minute
, second
,
532 weekday
, julian
, tz
))
534 def _insensitiveindex(lst
, findme
):
535 # Perform a case-insensitive index search.
537 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
538 # just lowercase when LocaleTime sets its vars and lowercasing
540 findme
= findme
.lower()
541 for key
,item
in enumerate(lst
):
542 if item
.lower() == findme
:
545 raise ValueError("value not in list")