2 # This file is part of my.gpodder.org.
4 # my.gpodder.org is free software: you can redistribute it and/or modify it
5 # under the terms of the GNU Affero General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or (at your
7 # option) any later version.
9 # my.gpodder.org is distributed in the hope that it will be useful, but
10 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
12 # License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
20 from htmlentitydefs
import entitydefs
23 def parse_time(value
):
28 >>> parse_time('05:10') #5*60+10
31 >>> parse_time('1:05:10') #60*60+5*60+10
35 raise ValueError('None value in parse_time')
37 if isinstance(value
, int):
38 # Don't need to parse already-converted time value
42 raise ValueError('Empty valueing in parse_time')
44 for format
in ('%H:%M:%S', '%M:%S'):
46 t
= time
.strptime(value
, format
)
47 return t
.tm_hour
* 60*60 + t
.tm_min
* 60 + t
.tm_sec
54 # taken from gpodder.util
55 def remove_html_tags(html
):
57 Remove HTML tags from a string and replace numeric and
58 named entities with the corresponding character, so the
59 HTML text can be displayed in a simple text view.
64 # If we would want more speed, we could make these global
65 re_strip_tags
= re
.compile('<[^>]*>')
66 re_unicode_entities
= re
.compile('&#(\d{2,4});')
67 re_html_entities
= re
.compile('&(.{2,8});')
68 re_newline_tags
= re
.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re
.I
)
69 re_listing_tags
= re
.compile('<li[^>]*>', re
.I
)
73 # Convert common HTML elements to their text equivalent
74 result
= re_newline_tags
.sub('\n', result
)
75 result
= re_listing_tags
.sub('\n * ', result
)
76 result
= re
.sub('<[Pp]>', '\n\n', result
)
78 # Remove all HTML/XML tags from the string
79 result
= re_strip_tags
.sub('', result
)
80 # Convert numeric XML entities to their unicode character
81 result
= re_unicode_entities
.sub(lambda x
: unichr(int(x
.group(1))), result
)
83 # Convert named HTML entities to their unicode character
84 result
= re_html_entities
.sub(lambda x
: unicode(entitydefs
.get(x
.group(1),''), 'iso-8859-1'), result
)
86 # Convert more than two newlines to two newlines
87 result
= re
.sub('([\r\n]{2})([\r\n])+', '\\1', result
)