2 # -*- coding: utf-8 -*-
4 # This file is part of my.gpodder.org.
6 # my.gpodder.org is free software: you can redistribute it and/or modify it
7 # under the terms of the GNU Affero General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or (at your
9 # option) any later version.
11 # my.gpodder.org is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
14 # License for more details.
16 # You should have received a copy of the GNU Affero General Public License
17 # along with my.gpodder.org. If not, see <http://www.gnu.org/licenses/>.
20 USER_AGENT
= 'mygpo crawler (+http://my.gpodder.org)'
32 from utils
import parse_time
34 from mimetype
import get_mimetype
, check_mimetype
, get_podcast_types
36 #socket.setdefaulttimeout(10)
37 fetcher
= feedcore
.Fetcher(USER_AGENT
)
40 def get_episode_files(entry
):
41 """Get the download / episode URL of a feedparser entry"""
44 enclosures
= getattr(entry
, 'enclosures', [])
45 for enclosure
in enclosures
:
46 if 'href' in enclosure
:
47 mimetype
= get_mimetype(enclosure
.get('type', ''), enclosure
['href'])
48 if check_mimetype(mimetype
):
50 filesize
= int(enclosure
['length'])
53 urls
[enclosure
['href']] = (mimetype
, filesize
)
55 media_content
= getattr(entry
, 'media_content', [])
56 for media
in media_content
:
58 mimetype
= get_mimetype(media
.get('type', ''), media
['url'])
59 if check_mimetype(mimetype
):
60 urls
[media
['url']] = (mimetype
, None)
62 links
= getattr(entry
, 'links', [])
64 if not hasattr(link
, 'href'):
67 if youtube
.is_video_link(link
['href']):
68 urls
[link
['href']] = ('application/x-youtube', None)
70 # XXX: Implement link detection as in gPodder
74 def get_episode_summary(entry
):
75 for key
in ('summary', 'subtitle', 'link'):
76 value
= entry
.get(key
, None)
82 def get_duration(entry
):
83 str = entry
.get('itunes_duration', '')
86 return parse_time(str)
90 def get_feed_tags(feed
):
93 for tag
in feed
.get('tags', []):
95 tags
.extend([t
for t
in tag
['term'].split(',') if t
])
98 tags
.append(tag
['label'])
103 def update_feed_tags(podcast
, tags
):
106 #delete all tags not found in the feed anymore
107 #PodcastTag.objects.filter(podcast=podcast, source=src).exclude(tag__in=tags).delete()
109 #create new found tags
111 # if not PodcastTag.objects.filter(podcast=podcast, source=src, tag=tag).exists():
112 # PodcastTag.objects.get_or_create(podcast=podcast, source=src, tag=tag)
115 def get_episode_metadata(entry
, files
):
117 'title': entry
.get('title', entry
.get('link', '')),
118 'description': get_episode_summary(entry
),
119 'link': entry
.get('link', ''),
121 'author': entry
.get('author', entry
.get('itunes_author', '')),
122 'duration': get_duration(entry
),
123 'language': entry
.get('language', ''),
124 'files': [ dict(url
=k
, mimetype
=v
[0], filesize
=v
[1]) for (k
, v
) in files
.items()],
125 'url': files
.keys()[0],
126 'filesize': files
.values()[0][1],
127 'mimetype': files
.values()[0][0],
130 d
['timestamp'] = datetime
.datetime(*(entry
.updated_parsed
)[:6]).strftime('%Y-%m-%dT%H:%M:%S')
132 d
['timestamp'] = None
137 def parse_feed(feed_url
, inline_logo
):
139 fetcher
.fetch(feed_url
)
141 except (feedcore
.Offline
, feedcore
.InvalidFeed
, feedcore
.WifiLogin
, feedcore
.AuthenticationRequired
):
144 except feedcore
.NewLocation
, location
:
145 return parse_feed(location
.data
)
147 except feedcore
.UpdatedFeed
, updated
:
150 podcast
['title'] = feed
.feed
.get('title', '')
151 podcast
['link'] = feed
.feed
.get('link', '')
152 podcast
['description'] = feed
.feed
.get('subtitle', '')
153 podcast
['author'] = feed
.feed
.get('author', feed
.feed
.get('itunes_author', ''))
154 podcast
['language'] = feed
.feed
.get('language', '')
156 logo_url
= get_podcast_logo(feed
)
157 podcast
['logo'] = logo_url
158 if inline_logo
and logo_url
:
159 podcast
['logo_data'] = get_data_uri(logo_url
)
161 #update_feed_tags(podcast, get_feed_tags(feed.feed))
163 podcast
['episodes'] = []
164 for entry
in feed
.entries
:
165 urls
= get_episode_files(entry
)
169 e
= get_episode_metadata(entry
, urls
)
170 podcast
['episodes'].append(e
)
172 podcast
['content_types'] = get_podcast_types(podcast
)
175 print >>sys
.stderr
, 'Exception:', e
180 def get_podcast_logo(feed
):
182 image
= feed
.feed
.get('image', None)
183 if image
is not None:
184 for key
in ('href', 'url'):
185 cover_art
= getattr(image
, key
, None)
189 yturl
= youtube
.get_real_cover(feed
.feed
.link
)
195 def get_data_uri(url
):
196 content
= urllib2
.urlopen(url
).read()
197 mimetype
= get_mimetype(None, url
)
198 encoded
= base64
.b64encode(content
)
199 return 'data:%s;base64,%s' % (mimetype
, encoded
)