1 from collections
import defaultdict
4 # If 20% of the episodes of a podcast are of a given type,
5 # then the podcast is considered to be of that type, too
9 CONTENT_TYPES
= ('image', 'audio', 'video')
11 def get_podcast_types(podcast
):
12 """Returns the types of a podcast
14 A podcast is considered to be of a given types if the ratio of episodes that are of that type equals TYPE_THRESHOLD
17 for e
in podcast
['episodes']:
18 t
= get_type(e
['files'][0].get('mimetype', ''))
19 types
[t
] = types
.get(t
, 0) + 1
21 max_episodes
= sum(types
.itervalues())
22 l
= list(types
.iteritems())
23 l
.sort(key
=lambda x
: x
[1], reverse
=True)
25 return [x
[0] for x
in filter(lambda x
: max_episodes
/ float(x
[1]) >= TYPE_THRESHOLD
, l
)]
28 def get_type(mimetype
):
29 """Returns the simplified type for the given mimetype
31 All "wanted" mimetypes are mapped to one of audio/video/image
32 Everything else returns None
38 category
, type = mimetype
.split('/', 1)
39 if category
in ('audio', 'video', 'image'):
43 elif type == 'x-youtube':
47 def check_mimetype(mimetype
):
48 """Checks if the given mimetype can be processed by mygpo
51 category
, type = mimetype
.split('/', 1)
52 if category
in ('audio', 'video', 'image'):
55 # application/ogg is a valid mime type for Ogg files
56 # but we do not want to accept all files with application category
65 def get_mimetype(mimetype
, url
):
66 """Returns the mimetype; if None is given it tries to guess it"""
69 mimetype
, _encoding
= mimetypes
.guess_type(url
)