add option to inline logos
[mygpo-feedservice.git] / feedservice / feeddownloader.py
bloba5352513e313200a06c4bce89149c67cec3eabcf
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
4 # This file is part of my.gpodder.org.
6 # my.gpodder.org is free software: you can redistribute it and/or modify it
7 # under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or (at your
9 # option) any later version.
11 # my.gpodder.org is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
14 # License for more details.
16 # You should have received a copy of the GNU Affero General Public License
17 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
20 USER_AGENT = 'mygpo crawler (+http://my.gpodder.org)'
23 import os
24 import sys
25 import datetime
26 import hashlib
27 import urllib2
28 import base64
29 #import socket
31 import feedcore
32 from utils import parse_time
33 import youtube
34 from mimetype import get_mimetype, check_mimetype, get_podcast_types
36 #socket.setdefaulttimeout(10)
37 fetcher = feedcore.Fetcher(USER_AGENT)
40 def get_episode_files(entry):
41 """Get the download / episode URL of a feedparser entry"""
43 urls = {}
44 enclosures = getattr(entry, 'enclosures', [])
45 for enclosure in enclosures:
46 if 'href' in enclosure:
47 mimetype = get_mimetype(enclosure.get('type', ''), enclosure['href'])
48 if check_mimetype(mimetype):
49 try:
50 filesize = int(enclosure['length'])
51 except ValueError:
52 filesize = None
53 urls[enclosure['href']] = (mimetype, filesize)
55 media_content = getattr(entry, 'media_content', [])
56 for media in media_content:
57 if 'url' in media:
58 mimetype = get_mimetype(media.get('type', ''), media['url'])
59 if check_mimetype(mimetype):
60 urls[media['url']] = (mimetype, None)
62 links = getattr(entry, 'links', [])
63 for link in links:
64 if not hasattr(link, 'href'):
65 continue
67 if youtube.is_video_link(link['href']):
68 urls[link['href']] = ('application/x-youtube', None)
70 # XXX: Implement link detection as in gPodder
72 return urls
74 def get_episode_summary(entry):
75 for key in ('summary', 'subtitle', 'link'):
76 value = entry.get(key, None)
77 if value:
78 return value
80 return ''
82 def get_duration(entry):
83 str = entry.get('itunes_duration', '')
85 try:
86 return parse_time(str)
87 except ValueError:
88 return 0
90 def get_feed_tags(feed):
91 tags = []
93 for tag in feed.get('tags', []):
94 if tag['term']:
95 tags.extend([t for t in tag['term'].split(',') if t])
97 if tag['label']:
98 tags.append(tag['label'])
100 return set(tags)
103 def update_feed_tags(podcast, tags):
104 src = 'feed'
106 #delete all tags not found in the feed anymore
107 #PodcastTag.objects.filter(podcast=podcast, source=src).exclude(tag__in=tags).delete()
109 #create new found tags
110 #for tag in tags:
111 # if not PodcastTag.objects.filter(podcast=podcast, source=src, tag=tag).exists():
112 # PodcastTag.objects.get_or_create(podcast=podcast, source=src, tag=tag)
115 def get_episode_metadata(entry, files):
116 d = {
117 'title': entry.get('title', entry.get('link', '')),
118 'description': get_episode_summary(entry),
119 'link': entry.get('link', ''),
120 'timestamp': None,
121 'author': entry.get('author', entry.get('itunes_author', '')),
122 'duration': get_duration(entry),
123 'language': entry.get('language', ''),
124 'files': [ dict(url=k, mimetype=v[0], filesize=v[1]) for (k, v) in files.items()],
125 'url': files.keys()[0],
126 'filesize': files.values()[0][1],
127 'mimetype': files.values()[0][0],
129 try:
130 d['timestamp'] = datetime.datetime(*(entry.updated_parsed)[:6]).strftime('%Y-%m-%dT%H:%M:%S')
131 except:
132 d['timestamp'] = None
134 return d
137 def parse_feed(feed_url, inline_logo):
138 try:
139 fetcher.fetch(feed_url)
141 except (feedcore.Offline, feedcore.InvalidFeed, feedcore.WifiLogin, feedcore.AuthenticationRequired):
142 pass
144 except feedcore.NewLocation, location:
145 return parse_feed(location.data)
147 except feedcore.UpdatedFeed, updated:
148 feed = updated.data
149 podcast = dict()
150 podcast['title'] = feed.feed.get('title', '')
151 podcast['link'] = feed.feed.get('link', '')
152 podcast['description'] = feed.feed.get('subtitle', '')
153 podcast['author'] = feed.feed.get('author', feed.feed.get('itunes_author', ''))
154 podcast['language'] = feed.feed.get('language', '')
156 logo_url = get_podcast_logo(feed)
157 podcast['logo'] = logo_url
158 if inline_logo and logo_url:
159 podcast['logo_data'] = get_data_uri(logo_url)
161 #update_feed_tags(podcast, get_feed_tags(feed.feed))
163 podcast['episodes'] = []
164 for entry in feed.entries:
165 urls = get_episode_files(entry)
166 if not urls:
167 continue
169 e = get_episode_metadata(entry, urls)
170 podcast['episodes'].append(e)
172 podcast['content_types'] = get_podcast_types(podcast)
174 except Exception, e:
175 print >>sys.stderr, 'Exception:', e
177 return podcast
180 def get_podcast_logo(feed):
181 cover_art = None
182 image = feed.feed.get('image', None)
183 if image is not None:
184 for key in ('href', 'url'):
185 cover_art = getattr(image, key, None)
186 if cover_art:
187 break
189 yturl = youtube.get_real_cover(feed.feed.link)
190 if yturl:
191 cover_art = yturl
193 return cover_art
195 def get_data_uri(url):
196 content = urllib2.urlopen(url).read()
197 mimetype = get_mimetype(None, url)
198 encoded = base64.b64encode(content)
199 return 'data:%s;base64,%s' % (mimetype, encoded)