1 """Strptime-related classes and functions.
4 LocaleTime -- Discovers and/or stores locale-specific time information
5 TimeRE -- Creates regexes for pattern matching a string of text containing
6 time information as is returned by time.strftime()
9 firstjulian -- Calculates the Julian date up to the first of the specified
11 gregorian -- Calculates the Gregorian date based on the Julian day and
13 julianday -- Calculates the Julian day since the first of the year based
15 dayofweek -- Calculates the day of the week from the Gregorian date.
16 strptime -- Calculates the time struct represented by the passed-in string
18 Requires Python 2.2.1 or higher.
19 Can be used in Python 2.2 if the following line is added:
20 >>> True = 1; False = 0
25 from re
import compile as re_compile
26 from re
import IGNORECASE
27 from string
import whitespace
as whitespace_string
30 __author__
= "Brett Cannon"
31 __email__
= "drifty@bigfoot.com"
33 __all__
= ['strptime']
35 RegexpType
= type(re_compile(''))
38 class LocaleTime(object):
39 """Stores and handles locale-specific information related to time.
41 ATTRIBUTES (all read-only after instance creation! Instance variables that
42 store the values have mangled names):
43 f_weekday -- full weekday names (7-item list)
44 a_weekday -- abbreviated weekday names (7-item list)
45 f_month -- full weekday names (14-item list; dummy value in [0], which
47 a_month -- abbreviated weekday names (13-item list, dummy value in
48 [0], which is added by code)
49 am_pm -- AM/PM representation (2-item list)
50 LC_date_time -- format string for date/time representation (string)
51 LC_date -- format string for date representation (string)
52 LC_time -- format string for time representation (string)
53 timezone -- daylight- and non-daylight-savings timezone representation
54 (3-item list; code tacks on blank item at end for
55 possible lack of timezone such as UTC)
56 lang -- Language used by instance (string)
59 def __init__(self
, f_weekday
=None, a_weekday
=None, f_month
=None,
60 a_month
=None, am_pm
=None, LC_date_time
=None, LC_time
=None,
61 LC_date
=None, timezone
=None, lang
=None):
62 """Optionally set attributes with passed-in values."""
64 self
.__f
_weekday
= None
65 elif len(f_weekday
) == 7:
66 self
.__f
_weekday
= list(f_weekday
)
68 raise TypeError("full weekday names must be a 7-item sequence")
70 self
.__a
_weekday
= None
71 elif len(a_weekday
) == 7:
72 self
.__a
_weekday
= list(a_weekday
)
75 "abbreviated weekday names must be a 7-item sequence")
78 elif len(f_month
) == 12:
79 self
.__f
_month
= self
.__pad
(f_month
, True)
81 raise TypeError("full month names must be a 12-item sequence")
84 elif len(a_month
) == 12:
85 self
.__a
_month
= self
.__pad
(a_month
, True)
88 "abbreviated month names must be a 12-item sequence")
94 raise TypeError("AM/PM representation must be a 2-item sequence")
95 self
.__LC
_date
_time
= LC_date_time
96 self
.__LC
_time
= LC_time
97 self
.__LC
_date
= LC_date
98 self
.__timezone
= timezone
100 if len(timezone
) != 2:
101 raise TypeError("timezone names must contain 2 items")
103 self
.__timezone
= self
.__pad
(timezone
, False)
106 def __pad(self
, seq
, front
):
107 # Add '' to seq to either front (is True), else the back.
115 def __set_nothing(self
, stuff
):
116 # Raise TypeError when trying to set an attribute.
117 raise TypeError("attribute does not support assignment")
119 def __get_f_weekday(self
):
120 # Fetch self.f_weekday.
121 if not self
.__f
_weekday
:
122 self
.__calc
_weekday
()
123 return self
.__f
_weekday
125 def __get_a_weekday(self
):
126 # Fetch self.a_weekday.
127 if not self
.__a
_weekday
:
128 self
.__calc
_weekday
()
129 return self
.__a
_weekday
131 f_weekday
= property(__get_f_weekday
, __set_nothing
,
132 doc
="Full weekday names")
133 a_weekday
= property(__get_a_weekday
, __set_nothing
,
134 doc
="Abbreviated weekday names")
136 def __get_f_month(self
):
137 # Fetch self.f_month.
138 if not self
.__f
_month
:
140 return self
.__f
_month
142 def __get_a_month(self
):
143 # Fetch self.a_month.
144 if not self
.__a
_month
:
146 return self
.__a
_month
148 f_month
= property(__get_f_month
, __set_nothing
,
149 doc
="Full month names (dummy value at index 0)")
150 a_month
= property(__get_a_month
, __set_nothing
,
151 doc
="Abbreviated month names (dummy value at index 0)")
153 def __get_am_pm(self
):
159 am_pm
= property(__get_am_pm
, __set_nothing
, doc
="AM/PM representation")
161 def __get_timezone(self
):
162 # Fetch self.timezone.
163 if not self
.__timezone
:
164 self
.__calc
_timezone
()
165 return self
.__timezone
167 timezone
= property(__get_timezone
, __set_nothing
,
168 doc
="Timezone representation (dummy value at index 2)")
170 def __get_LC_date_time(self
):
171 # Fetch self.LC_date_time.
172 if not self
.__LC
_date
_time
:
173 self
.__calc
_date
_time
()
174 return self
.__LC
_date
_time
176 def __get_LC_date(self
):
177 # Fetch self.LC_date.
178 if not self
.__LC
_date
:
179 self
.__calc
_date
_time
()
180 return self
.__LC
_date
182 def __get_LC_time(self
):
183 # Fetch self.LC_time.
184 if not self
.__LC
_time
:
185 self
.__calc
_date
_time
()
186 return self
.__LC
_time
188 LC_date_time
= property(
189 __get_LC_date_time
, __set_nothing
,
191 "Format string for locale's date/time representation ('%c' format)")
192 LC_date
= property(__get_LC_date
, __set_nothing
,
193 doc
="Format string for locale's date representation ('%x' format)")
194 LC_time
= property(__get_LC_time
, __set_nothing
,
195 doc
="Format string for locale's time representation ('%X' format)")
197 def __get_lang(self
):
203 lang
= property(__get_lang
, __set_nothing
,
204 doc
="Language used for instance")
206 def __calc_weekday(self
):
207 # Set self.__a_weekday and self.__f_weekday using the calendar
209 a_weekday
= [calendar
.day_abbr
[i
] for i
in range(7)]
210 f_weekday
= [calendar
.day_name
[i
] for i
in range(7)]
211 if not self
.__a
_weekday
:
212 self
.__a
_weekday
= a_weekday
213 if not self
.__f
_weekday
:
214 self
.__f
_weekday
= f_weekday
216 def __calc_month(self
):
217 # Set self.__f_month and self.__a_month using the calendar module.
218 a_month
= [calendar
.month_abbr
[i
] for i
in range(13)]
219 f_month
= [calendar
.month_name
[i
] for i
in range(13)]
220 if not self
.__a
_month
:
221 self
.__a
_month
= a_month
222 if not self
.__f
_month
:
223 self
.__f
_month
= f_month
225 def __calc_am_pm(self
):
226 # Set self.__am_pm by using time.strftime().
228 # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
229 # magical; just happened to have used it everywhere else where a
230 # static date was needed.
233 time_tuple
= time
.struct_time((1999,3,17,hour
,44,55,2,76,0))
234 am_pm
.append(time
.strftime("%p", time_tuple
))
237 def __calc_date_time(self
):
238 # Set self.__date_time, self.__date, & self.__time by using
241 # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
242 # overloaded numbers is minimized. The order in which searches for
243 # values within the format string is very important; it eliminates
244 # possible ambiguity for what something represents.
245 time_tuple
= time
.struct_time((1999,3,17,22,44,55,2,76,0))
246 date_time
= [None, None, None]
247 date_time
[0] = time
.strftime("%c", time_tuple
)
248 date_time
[1] = time
.strftime("%x", time_tuple
)
249 date_time
[2] = time
.strftime("%X", time_tuple
)
250 for offset
,directive
in ((0,'%c'), (1,'%x'), (2,'%X')):
251 current_format
= date_time
[offset
]
253 ('%', '%%'), (self
.f_weekday
[2], '%A'),
254 (self
.f_month
[3], '%B'), (self
.a_weekday
[2], '%a'),
255 (self
.a_month
[3], '%b'), (self
.am_pm
[1], '%p'),
256 (self
.timezone
[0], '%Z'), (self
.timezone
[1], '%Z'),
257 ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
258 ('44', '%M'), ('55', '%S'), ('76', '%j'),
259 ('17', '%d'), ('03', '%m'), ('3', '%m'),
260 # '3' needed for when no leading zero.
261 ('2', '%w'), ('10', '%I')):
263 # Done this way to deal with possible lack of locale info
264 # manifesting itself as the empty string (i.e., Swedish's
265 # lack of AM/PM info).
266 current_format
= current_format
.replace(old
, new
)
269 time_tuple
= time
.struct_time((1999,1,3,1,1,1,6,3,0))
270 if time
.strftime(directive
, time_tuple
).find('00'):
274 date_time
[offset
] = current_format
.replace('11', U_W
)
275 if not self
.__LC
_date
_time
:
276 self
.__LC
_date
_time
= date_time
[0]
277 if not self
.__LC
_date
:
278 self
.__LC
_date
= date_time
[1]
279 if not self
.__LC
_time
:
280 self
.__LC
_time
= date_time
[2]
282 def __calc_timezone(self
):
283 # Set self.__timezone by using time.tzname.
285 # Empty string used for matching when timezone is not used/needed such
287 self
.__timezone
= self
.__pad
(time
.tzname
, 0)
289 def __calc_lang(self
):
290 # Set self.lang by using locale.getlocale() or
291 # locale.getdefaultlocale().
292 current_lang
= locale
.getlocale(locale
.LC_TIME
)[0]
294 self
.__lang
= current_lang
296 self
.__lang
= locale
.getdefaultlocale()[0]
300 """Handle conversion from format directives to regexes."""
302 def __init__(self
, locale_time
=LocaleTime()):
303 """Init inst with non-locale regexes and store LocaleTime object."""
304 super(TimeRE
,self
).__init
__({
305 # The " \d" option is to make %c from ANSI C work
306 'd': r
"(?P<d>3[0-1]|[0-2]\d|\d| \d)",
307 'H': r
"(?P<H>2[0-3]|[0-1]\d|\d)",
308 'I': r
"(?P<I>0\d|1[0-2]|\d)",
309 'j': r
"(?P<j>(?:3[0-5]\d|6[0-6])|[0-2]\d\d|\d)",
310 'm': r
"(?P<m>0\d|1[0-2]|\d)",
311 'M': r
"(?P<M>[0-5]\d|\d)",
312 'S': r
"(?P<S>6[0-1]|[0-5]\d|\d)",
313 'U': r
"(?P<U>5[0-3]|[0-4]\d|\d)",
314 'w': r
"(?P<w>[0-6])",
315 'W': r
"(?P<W>5[0-3]|[0-4]\d|\d)", # Same as U
317 'Y': r
"(?P<Y>\d\d\d\d)"})
318 self
.locale_time
= locale_time
320 def __getitem__(self
, fetch
):
321 """Try to fetch regex; if it does not exist, construct it."""
323 return super(TimeRE
, self
).__getitem
__(fetch
)
326 'A': lambda: self
.__seqToRE
(self
.locale_time
.f_weekday
, fetch
),
327 'a': lambda: self
.__seqToRE
(self
.locale_time
.a_weekday
, fetch
),
328 'B': lambda: self
.__seqToRE
(self
.locale_time
.f_month
[1:],
330 'b': lambda: self
.__seqToRE
(self
.locale_time
.a_month
[1:],
332 'c': lambda: self
.pattern(self
.locale_time
.LC_date_time
),
333 'p': lambda: self
.__seqToRE
(self
.locale_time
.am_pm
, fetch
),
334 'x': lambda: self
.pattern(self
.locale_time
.LC_date
),
335 'X': lambda: self
.pattern(self
.locale_time
.LC_time
),
336 'Z': lambda: self
.__seqToRE
(self
.locale_time
.timezone
, fetch
),
339 if fetch
in constructors
:
340 self
[fetch
] = constructors
[fetch
]()
345 def __seqToRE(self
, to_convert
, directive
):
346 """Convert a list to a regex string for matching directive."""
348 """Sort based on length.
350 Done in case for some strange reason that names in the locale only
351 differ by a suffix and thus want the name with the suffix to match
362 return cmp(b_length
, a_length
)
364 to_convert
= to_convert
[:] # Don't want to change value in-place.
365 to_convert
.sort(sorter
)
366 regex
= '|'.join(to_convert
)
367 regex
= '(?P<%s>%s' % (directive
, regex
)
370 def pattern(self
, format
):
371 """Return re pattern for the format string."""
372 processed_format
= ''
373 for whitespace
in whitespace_string
:
374 format
= format
.replace(whitespace
, r
'\s*')
375 while format
.find('%') != -1:
376 directive_index
= format
.index('%')+1
377 processed_format
= "%s%s%s" % (processed_format
,
378 format
[:directive_index
-1],
379 self
[format
[directive_index
]])
380 format
= format
[directive_index
+1:]
381 return "%s%s" % (processed_format
, format
)
383 def compile(self
, format
):
384 """Return a compiled re object for the format string."""
385 format
= "(?#%s)%s" % (self
.locale_time
.lang
,format
)
386 return re_compile(self
.pattern(format
), IGNORECASE
)
389 def strptime(data_string
, format
="%a %b %d %H:%M:%S %Y"):
390 """Return a time struct based on the input data and the format string.
392 The format argument may either be a regular expression object compiled by
393 strptime(), or a format string. If False is passed in for data_string
394 then the re object calculated for format will be returned. The re object
395 must be used with the same locale as was used to compile the re object.
397 locale_time
= LocaleTime()
398 if isinstance(format
, RegexpType
):
399 if format
.pattern
.find(locale_time
.lang
) == -1:
400 raise TypeError("re object not created with same language as "
401 "LocaleTime instance")
405 compiled_re
= TimeRE(locale_time
).compile(format
)
406 if data_string
is False:
409 found
= compiled_re
.match(data_string
)
411 raise ValueError("time data did not match format")
412 year
= month
= day
= hour
= minute
= second
= weekday
= julian
= tz
=-1
413 found_dict
= found
.groupdict()
414 for group_key
in found_dict
.iterkeys():
417 (time
.strftime("%Y")[:-2], found_dict
['y']))
418 elif group_key
== 'Y':
419 year
= int(found_dict
['Y'])
420 elif group_key
== 'm':
421 month
= int(found_dict
['m'])
422 elif group_key
== 'B':
423 month
= _insensitiveindex(locale_time
.f_month
, found_dict
['B'])
424 elif group_key
== 'b':
425 month
= _insensitiveindex(locale_time
.a_month
, found_dict
['b'])
426 elif group_key
== 'd':
427 day
= int(found_dict
['d'])
428 elif group_key
is 'H':
429 hour
= int(found_dict
['H'])
430 elif group_key
== 'I':
431 hour
= int(found_dict
['I'])
432 ampm
= found_dict
.get('p', '').lower()
433 # If there was no AM/PM indicator, we'll treat this like AM
434 if ampm
in ('', locale_time
.am_pm
[0].lower()):
435 # We're in AM so the hour is correct unless we're
436 # looking at 12 midnight.
437 # 12 midnight == 12 AM == hour 0
440 elif ampm
== locale_time
.am_pm
[1].lower():
441 # We're in PM so we need to add 12 to the hour unless
442 # we're looking at 12 noon.
443 # 12 noon == 12 PM == hour 12
446 elif group_key
== 'M':
447 minute
= int(found_dict
['M'])
448 elif group_key
== 'S':
449 second
= int(found_dict
['S'])
450 elif group_key
== 'A':
451 weekday
= _insensitiveindex(locale_time
.f_weekday
,
453 elif group_key
== 'a':
454 weekday
= _insensitiveindex(locale_time
.a_weekday
,
456 elif group_key
== 'w':
457 weekday
= int(found_dict
['w'])
462 elif group_key
== 'j':
463 julian
= int(found_dict
['j'])
464 elif group_key
== 'Z':
465 found_zone
= found_dict
['Z'].lower()
466 if locale_time
.timezone
[0].lower() == found_zone
:
468 elif locale_time
.timezone
[1].lower() == found_zone
:
470 elif locale_time
.timezone
[2].lower() == found_zone
:
472 #XXX <bc>: If calculating fxns are never exposed to the general
473 # populous then just inline calculations.
474 if julian
== -1 and year
!= -1 and month
!= -1 and day
!= -1:
475 julian
= julianday(year
, month
, day
)
476 if (month
== -1 or day
== -1) and julian
!= -1 and year
!= -1:
477 year
, month
, day
= gregorian(julian
, year
)
478 if weekday
== -1 and year
!= -1 and month
!= -1 and day
!= -1:
479 weekday
= dayofweek(year
, month
, day
)
480 return time
.struct_time(
481 (year
,month
,day
,hour
,minute
,second
,weekday
, julian
,tz
))
483 def _insensitiveindex(lst
, findme
):
484 # Perform a case-insensitive index search.
486 #XXX <bc>: If LocaleTime is not exposed, then consider removing this and
487 # just lowercase when LocaleTime sets its vars and lowercasing
489 findme
= findme
.lower()
490 for key
,item
in enumerate(lst
):
491 if item
.lower() == findme
:
494 raise ValueError("value not in list")
496 def firstjulian(year
):
497 """Calculate the Julian date up until the first of the year."""
498 return ((146097 * (year
+ 4799)) // 400) - 31738
500 def julianday(year
, month
, day
):
501 """Calculate the Julian day since the beginning of the year.
502 Calculated from the Gregorian date.
504 a
= (14 - month
) // 12
506 + (((153 * (month
+ (12 * a
) - 3)) + 2) // 5)
507 + ((146097 * (year
+ 4800 - a
)) // 400)) - firstjulian(year
) + 1
509 def gregorian(julian
, year
):
510 """Return 3-item list containing Gregorian date based on the Julian day."""
511 a
= 32043 + julian
+ firstjulian(year
)
512 b
= ((4 * a
) + 3) // 146097
513 c
= a
- ((146097 * b
) // 4)
514 d
= ((4 * c
) + 3) // 1461
515 e
= c
- ((1461 * d
) // 4)
516 m
= ((5 * e
) + 2) // 153
517 day
= 1 + e
- (((153 * m
) + 2) // 5)
518 month
= m
+ 3 - (12 * (m
// 10))
519 year
= (100 * b
) + d
- 4800 + (m
// 10)
520 return [year
, month
, day
]
522 def dayofweek(year
, month
, day
):
523 """Calculate the day of the week (Monday is 0)."""
524 a
= (14 - month
) // 12
526 weekday
= (day
+ y
+ ((97 * y
) // 400)
527 + ((31 * (month
+ (12 * a
) -2 )) // 12)) % 7