add episode fields 'short_title' and 'number'
[mygpo-feedservice.git] / feedservice / mimetype.py
blob5e480a50fe840bf2cbc6c100ac2bea87c5dee225
1 from collections import defaultdict
2 import mimetypes
4 # If 20% of the episodes of a podcast are of a given type,
5 # then the podcast is considered to be of that type, too
6 TYPE_THRESHOLD=.2
9 CONTENT_TYPES = ('image', 'audio', 'video')
11 def get_podcast_types(podcast):
12 """Returns the types of a podcast
14 A podcast is considered to be of a given types if the ratio of episodes that are of that type equals TYPE_THRESHOLD
15 """
16 types = defaultdict()
17 for e in podcast['episodes']:
18 t = get_type(e['files'][0].get('mimetype', ''))
19 types[t] = types.get(t, 0) + 1
21 max_episodes = sum(types.itervalues())
22 l = list(types.iteritems())
23 l.sort(key=lambda x: x[1], reverse=True)
25 return [x[0] for x in filter(lambda x: max_episodes / float(x[1]) >= TYPE_THRESHOLD, l)]
28 def get_type(mimetype):
29 """Returns the simplified type for the given mimetype
31 All "wanted" mimetypes are mapped to one of audio/video/image
32 Everything else returns None
33 """
34 if not mimetype:
35 return None
37 if '/' in mimetype:
38 category, type = mimetype.split('/', 1)
39 if category in ('audio', 'video', 'image'):
40 return category
41 elif type == 'ogg':
42 return 'audio'
43 elif type == 'x-youtube':
44 return 'video'
45 return None
47 def check_mimetype(mimetype):
48 """Checks if the given mimetype can be processed by mygpo
49 """
50 if '/' in mimetype:
51 category, type = mimetype.split('/', 1)
52 if category in ('audio', 'video', 'image'):
53 return True
55 # application/ogg is a valid mime type for Ogg files
56 # but we do not want to accept all files with application category
57 if type in ('ogg', ):
58 return True
60 return False
61 else:
62 return False
65 def get_mimetype(mimetype, url):
66 """Returns the mimetype; if None is given it tries to guess it"""
68 if not mimetype:
69 mimetype, _encoding = mimetypes.guess_type(url)
71 return mimetype