1 """Strptime-related classes and functions.
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information as is returned by time.strftime()
9 _getlang -- Figure out what language is being used for the locale
10 strptime -- Calculates the time struct represented by the passed-in string
12 Requires Python 2.2.1 or higher (mainly because of the use of property()).
13 Can be used in Python 2.2 if the following line is added:
19 from re
import compile as re_compile
20 from re
import IGNORECASE
21 from datetime
import date
as datetime_date
23 __author__
= "Brett Cannon"
24 __email__
= "brett@python.org"
26 __all__
= ['strptime']
29 # Figure out what the current language is set to.
30 return locale
.getlocale(locale
.LC_TIME
)
32 class LocaleTime(object):
33 """Stores and handles locale-specific information related to time.
35 This is not thread-safe! Attributes are lazily calculated and no
36 precaution is taken to check to see if the locale information has changed
37 since the creation of the instance in use.
39 ATTRIBUTES (all read-only after instance creation! Instance variables that
40 store the values have mangled names):
41 f_weekday -- full weekday names (7-item list)
42 a_weekday -- abbreviated weekday names (7-item list)
43 f_month -- full month names (13-item list; dummy value in [0], which
45 a_month -- abbreviated month names (13-item list, dummy value in
46 [0], which is added by code)
47 am_pm -- AM/PM representation (2-item list)
48 LC_date_time -- format string for date/time representation (string)
49 LC_date -- format string for date representation (string)
50 LC_time -- format string for time representation (string)
51 timezone -- daylight- and non-daylight-savings timezone representation
52 (3-item list; code tacks on blank item at end for
53 possible lack of timezone such as UTC)
54 lang -- Language used by instance (string)
57 def __init__(self
, f_weekday
=None, a_weekday
=None, f_month
=None,
58 a_month
=None, am_pm
=None, LC_date_time
=None, LC_time
=None,
59 LC_date
=None, timezone
=None, lang
=None):
60 """Optionally set attributes with passed-in values."""
62 self
.__f
_weekday
= None
63 elif len(f_weekday
) == 7:
64 self
.__f
_weekday
= list(f_weekday
)
66 raise TypeError("full weekday names must be a 7-item sequence")
68 self
.__a
_weekday
= None
69 elif len(a_weekday
) == 7:
70 self
.__a
_weekday
= list(a_weekday
)
73 "abbreviated weekday names must be a 7-item sequence")
76 elif len(f_month
) == 12:
77 self
.__f
_month
= self
.__pad
(f_month
, True)
79 raise TypeError("full month names must be a 12-item sequence")
82 elif len(a_month
) == 12:
83 self
.__a
_month
= self
.__pad
(a_month
, True)
86 "abbreviated month names must be a 12-item sequence")
92 raise TypeError("AM/PM representation must be a 2-item sequence")
93 self
.__LC
_date
_time
= LC_date_time
94 self
.__LC
_time
= LC_time
95 self
.__LC
_date
= LC_date
96 self
.__timezone
= timezone
98 if len(timezone
) != 2:
99 raise TypeError("timezone names must contain 2 items")
101 self
.__timezone
= self
.__pad
(timezone
, False)
105 self
.__lang
= _getlang()
107 def __pad(self
, seq
, front
):
108 # Add '' to seq to either front (is True), else the back.
116 def __set_nothing(self
, stuff
):
117 # Raise TypeError when trying to set an attribute.
118 raise TypeError("attribute does not support assignment")
120 def __get_f_weekday(self
):
121 # Fetch self.f_weekday.
122 if not self
.__f
_weekday
:
123 self
.__calc
_weekday
()
124 return self
.__f
_weekday
126 def __get_a_weekday(self
):
127 # Fetch self.a_weekday.
128 if not self
.__a
_weekday
:
129 self
.__calc
_weekday
()
130 return self
.__a
_weekday
132 f_weekday
= property(__get_f_weekday
, __set_nothing
,
133 doc
="Full weekday names")
134 a_weekday
= property(__get_a_weekday
, __set_nothing
,
135 doc
="Abbreviated weekday names")
137 def __get_f_month(self
):
138 # Fetch self.f_month.
139 if not self
.__f
_month
:
141 return self
.__f
_month
143 def __get_a_month(self
):
144 # Fetch self.a_month.
145 if not self
.__a
_month
:
147 return self
.__a
_month
149 f_month
= property(__get_f_month
, __set_nothing
,
150 doc
="Full month names (dummy value at index 0)")
151 a_month
= property(__get_a_month
, __set_nothing
,
152 doc
="Abbreviated month names (dummy value at index 0)")
154 def __get_am_pm(self
):
160 am_pm
= property(__get_am_pm
, __set_nothing
, doc
="AM/PM representation")
162 def __get_timezone(self
):
163 # Fetch self.timezone.
164 if not self
.__timezone
:
165 self
.__calc
_timezone
()
166 return self
.__timezone
168 timezone
= property(__get_timezone
, __set_nothing
,
169 doc
="Timezone representation (dummy value at index 2)")
171 def __get_LC_date_time(self
):
172 # Fetch self.LC_date_time.
173 if not self
.__LC
_date
_time
:
174 self
.__calc
_date
_time
()
175 return self
.__LC
_date
_time
177 def __get_LC_date(self
):
178 # Fetch self.LC_date.
179 if not self
.__LC
_date
:
180 self
.__calc
_date
_time
()
181 return self
.__LC
_date
183 def __get_LC_time(self
):
184 # Fetch self.LC_time.
185 if not self
.__LC
_time
:
186 self
.__calc
_date
_time
()
187 return self
.__LC
_time
189 LC_date_time
= property(
190 __get_LC_date_time
, __set_nothing
,
192 "Format string for locale's date/time representation ('%c' format)")
193 LC_date
= property(__get_LC_date
, __set_nothing
,
194 doc
="Format string for locale's date representation ('%x' format)")
195 LC_time
= property(__get_LC_time
, __set_nothing
,
196 doc
="Format string for locale's time representation ('%X' format)")
198 lang
= property(lambda self
: self
.__lang
, __set_nothing
,
199 doc
="Language used for instance")
201 def __calc_weekday(self
):
202 # Set self.__a_weekday and self.__f_weekday using the calendar
204 a_weekday
= [calendar
.day_abbr
[i
] for i
in range(7)]
205 f_weekday
= [calendar
.day_name
[i
] for i
in range(7)]
206 if not self
.__a
_weekday
:
207 self
.__a
_weekday
= a_weekday
208 if not self
.__f
_weekday
:
209 self
.__f
_weekday
= f_weekday
211 def __calc_month(self
):
212 # Set self.__f_month and self.__a_month using the calendar module.
213 a_month
= [calendar
.month_abbr
[i
] for i
in range(13)]
214 f_month
= [calendar
.month_name
[i
] for i
in range(13)]
215 if not self
.__a
_month
:
216 self
.__a
_month
= a_month
217 if not self
.__f
_month
:
218 self
.__f
_month
= f_month
220 def __calc_am_pm(self
):
221 # Set self.__am_pm by using time.strftime().
223 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
224 # magical; just happened to have used it everywhere else where a
225 # static date was needed.
228 time_tuple
= time
.struct_time((1999,3,17,hour
,44,55,2,76,0))
229 am_pm
.append(time
.strftime("%p", time_tuple
))
232 def __calc_date_time(self
):
233 # Set self.__date_time, self.__date, & self.__time by using
236 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
237 # overloaded numbers is minimized. The order in which searches for
238 # values within the format string is very important; it eliminates
239 # possible ambiguity for what something represents.
240 time_tuple
= time
.struct_time((1999,3,17,22,44,55,2,76,0))
241 date_time
= [None, None, None]
242 date_time
[0] = time
.strftime("%c", time_tuple
)
243 date_time
[1] = time
.strftime("%x", time_tuple
)
244 date_time
[2] = time
.strftime("%X", time_tuple
)
245 for offset
,directive
in ((0,'%c'), (1,'%x'), (2,'%X')):
246 current_format
= date_time
[offset
]
248 ('%', '%%'), (self
.f_weekday
[2], '%A'),
249 (self
.f_month
[3], '%B'), (self
.a_weekday
[2], '%a'),
250 (self
.a_month
[3], '%b'), (self
.am_pm
[1], '%p'),
251 (self
.timezone
[0], '%Z'), (self
.timezone
[1], '%Z'),
252 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
253 ('44', '%M'), ('55', '%S'), ('76', '%j'),
254 ('17', '%d'), ('03', '%m'), ('3', '%m'),
255 # '3' needed for when no leading zero.
256 ('2', '%w'), ('10', '%I')):
257 # Must deal with possible lack of locale info
258 # manifesting itself as the empty string (e.g., Swedish's
259 # lack of AM/PM info) or a platform returning a tuple of empty
260 # strings (e.g., MacOS 9 having timezone as ('','')).
262 current_format
= current_format
.replace(old
, new
)
263 time_tuple
= time
.struct_time((1999,1,3,1,1,1,6,3,0))
264 if time
.strftime(directive
, time_tuple
).find('00'):
268 date_time
[offset
] = current_format
.replace('11', U_W
)
269 if not self
.__LC
_date
_time
:
270 self
.__LC
_date
_time
= date_time
[0]
271 if not self
.__LC
_date
:
272 self
.__LC
_date
= date_time
[1]
273 if not self
.__LC
_time
:
274 self
.__LC
_time
= date_time
[2]
276 def __calc_timezone(self
):
277 # Set self.__timezone by using time.tzname.
279 # Empty string used for matching when timezone is not used/needed.
282 except AttributeError:
284 time_zones
= ["UTC", "GMT"]
286 time_zones
.extend(time
.tzname
)
288 time_zones
.append(time
.tzname
[0])
289 self
.__timezone
= self
.__pad
(time_zones
, 0)
293 """Handle conversion from format directives to regexes."""
295 def __init__(self
, locale_time
=None):
296 """Init inst with non-locale regexes and store LocaleTime object."""
297 #XXX: Does 'Y' need to worry about having less or more than 4 digits?
298 base
= super(TimeRE
, self
)
300 # The " \d" option is to make %c from ANSI C work
301 'd': r
"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
302 'H': r
"(?P<H>2[0-3]|[0-1]\d|\d)",
303 'I': r
"(?P<I>1[0-2]|0[1-9]|[1-9])",
304 'j': r
"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])",
305 'm': r
"(?P<m>1[0-2]|0[1-9]|[1-9])",
306 'M': r
"(?P<M>[0-5]\d|\d)",
307 'S': r
"(?P<S>6[0-1]|[0-5]\d|\d)",
308 'U': r
"(?P<U>5[0-3]|[0-4]\d|\d)",
309 'w': r
"(?P<w>[0-6])",
310 # W is set below by using 'U'
312 'Y': r
"(?P<Y>\d\d\d\d)"})
313 base
.__setitem
__('W', base
.__getitem
__('U'))
315 self
.locale_time
= locale_time
317 self
.locale_time
= LocaleTime()
319 def __getitem__(self
, fetch
):
320 """Try to fetch regex; if it does not exist, construct it."""
322 return super(TimeRE
, self
).__getitem
__(fetch
)
325 'A': lambda: self
.__seqToRE
(self
.locale_time
.f_weekday
, fetch
),
326 'a': lambda: self
.__seqToRE
(self
.locale_time
.a_weekday
, fetch
),
327 'B': lambda: self
.__seqToRE
(self
.locale_time
.f_month
[1:],
329 'b': lambda: self
.__seqToRE
(self
.locale_time
.a_month
[1:],
331 'c': lambda: self
.pattern(self
.locale_time
.LC_date_time
),
332 'p': lambda: self
.__seqToRE
(self
.locale_time
.am_pm
, fetch
),
333 'x': lambda: self
.pattern(self
.locale_time
.LC_date
),
334 'X': lambda: self
.pattern(self
.locale_time
.LC_time
),
335 'Z': lambda: self
.__seqToRE
(self
.locale_time
.timezone
, fetch
),
338 if fetch
in constructors
:
339 self
[fetch
] = constructors
[fetch
]()
344 def __seqToRE(self
, to_convert
, directive
):
345 """Convert a list to a regex string for matching a directive."""
347 """Sort based on length.
349 Done in case for some strange reason that names in the locale only
350 differ by a suffix and thus want the name with the suffix to match
361 return cmp(b_length
, a_length
)
363 to_convert
= to_convert
[:] # Don't want to change value in-place.
364 for value
in to_convert
:
369 to_convert
.sort(sorter
)
370 regex
= '|'.join(to_convert
)
371 regex
= '(?P<%s>%s' % (directive
, regex
)
374 def pattern(self
, format
):
375 """Return re pattern for the format string.
377 Need to make sure that any characters that might be interpreted as
378 regex syntax is escaped.
381 processed_format
= ''
382 # The sub() call escapes all characters that might be misconstrued
384 regex_chars
= re_compile(r
"([\\.^$*+?i\(\){}\[\]|])")
385 format
= regex_chars
.sub(r
"\\\1", format
)
386 whitespace_replacement
= re_compile('\s+')
387 format
= whitespace_replacement
.sub('\s*', format
)
388 while format
.find('%') != -1:
389 directive_index
= format
.index('%')+1
390 processed_format
= "%s%s%s" % (processed_format
,
391 format
[:directive_index
-1],
392 self
[format
[directive_index
]])
393 format
= format
[directive_index
+1:]
394 return "%s%s" % (processed_format
, format
)
396 def compile(self
, format
):
397 """Return a compiled re object for the format string."""
398 return re_compile(self
.pattern(format
), IGNORECASE
)
400 # Cached TimeRE; probably only need one instance ever so cache it for performance
401 _locale_cache
= TimeRE()
402 # Cached regex objects; same reason as for TimeRE cache
403 _regex_cache
= dict()
405 def strptime(data_string
, format
="%a %b %d %H:%M:%S %Y"):
406 """Return a time struct based on the input data and the format string."""
409 locale_time
= _locale_cache
.locale_time
410 # If the language changes, caches are invalidated, so clear them
411 if locale_time
.lang
!= _getlang():
412 _locale_cache
= TimeRE()
414 format_regex
= _regex_cache
.get(format
)
416 # Limit regex cache size to prevent major bloating of the module;
417 # The value 5 is arbitrary
418 if len(_regex_cache
) > 5:
420 format_regex
= _locale_cache
.compile(format
)
421 _regex_cache
[format
] = format_regex
422 found
= format_regex
.match(data_string
)
424 raise ValueError("time data did not match format: data=%s fmt=%s" %
425 (data_string
, format
))
426 if len(data_string
) != found
.end():
427 raise ValueError("unconverted data remains: %s" %
428 data_string
[found
.end():])
431 hour
= minute
= second
= 0
433 # weekday and julian defaulted to -1 so as to signal need to calculate values
434 weekday
= julian
= -1
435 found_dict
= found
.groupdict()
436 for group_key
in found_dict
.iterkeys():
438 year
= int(found_dict
['y'])
439 # Open Group specification for strptime() states that a %y
440 #value in the range of [00, 68] is in the century 2000, while
441 #[69,99] is in the century 1900
446 elif group_key
== 'Y':
447 year
= int(found_dict
['Y'])
448 elif group_key
== 'm':
449 month
= int(found_dict
['m'])
450 elif group_key
== 'B':
451 month
= _insensitiveindex(locale_time
.f_month
, found_dict
['B'])
452 elif group_key
== 'b':
453 month
= _insensitiveindex(locale_time
.a_month
, found_dict
['b'])
454 elif group_key
== 'd':
455 day
= int(found_dict
['d'])
456 elif group_key
== 'H':
457 hour
= int(found_dict
['H'])
458 elif group_key
== 'I':
459 hour
= int(found_dict
['I'])
460 ampm
= found_dict
.get('p', '').lower()
461 # If there was no AM/PM indicator, we'll treat this like AM
462 if ampm
in ('', locale_time
.am_pm
[0].lower()):
463 # We're in AM so the hour is correct unless we're
464 # looking at 12 midnight.
465 # 12 midnight == 12 AM == hour 0
468 elif ampm
== locale_time
.am_pm
[1].lower():
469 # We're in PM so we need to add 12 to the hour unless
470 # we're looking at 12 noon.
471 # 12 noon == 12 PM == hour 12
474 elif group_key
== 'M':
475 minute
= int(found_dict
['M'])
476 elif group_key
== 'S':
477 second
= int(found_dict
['S'])
478 elif group_key
== 'A':
479 weekday
= _insensitiveindex(locale_time
.f_weekday
,
481 elif group_key
== 'a':
482 weekday
= _insensitiveindex(locale_time
.a_weekday
,
484 elif group_key
== 'w':
485 weekday
= int(found_dict
['w'])
490 elif group_key
== 'j':
491 julian
= int(found_dict
['j'])
492 elif group_key
== 'Z':
493 # Since -1 is default value only need to worry about setting tz if
494 # it can be something other than -1.
495 found_zone
= found_dict
['Z'].lower()
496 if found_zone
in ("utc", "gmt"):
498 elif time
.tzname
[0] == time
.tzname
[1] and \
500 continue #Deals with bad locale setup where timezone info is
501 # the same; first found on FreeBSD 4.4.
502 elif locale_time
.timezone
[2].lower() == found_zone
:
504 elif time
.daylight
and \
505 locale_time
.timezone
[3].lower() == found_zone
:
508 # Cannot pre-calculate datetime_date() since can change in Julian
509 #calculation and thus could have different value for the day of the week
512 # Need to add 1 to result since first day of the year is 1, not 0.
513 julian
= datetime_date(year
, month
, day
).toordinal() - \
514 datetime_date(year
, 1, 1).toordinal() + 1
515 else: # Assume that if they bothered to include Julian day it will
517 datetime_result
= datetime_date
.fromordinal((julian
- 1) + datetime_date(year
, 1, 1).toordinal())
518 year
= datetime_result
.year
519 month
= datetime_result
.month
520 day
= datetime_result
.day
522 weekday
= datetime_date(year
, month
, day
).weekday()
523 return time
.struct_time((year
, month
, day
,
524 hour
, minute
, second
,
525 weekday
, julian
, tz
))
527 def _insensitiveindex(lst
, findme
):
528 # Perform a case-insensitive index search.
530 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
531 # just lowercase when LocaleTime sets its vars and lowercasing
533 findme
= findme
.lower()
534 for key
,item
in enumerate(lst
):
535 if item
.lower() == findme
:
538 raise ValueError("value not in list")