1 from .common
import InfoExtractor
11 class LibsynIE(InfoExtractor
):
12 _VALID_URL
= r
'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
13 _EMBED_REGEX
= [r
'<iframe[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//html5
-player\
.libsyn\
.com
/embed
/.+?
)\
1']
16 'url
': 'http
://html5
-player
.libsyn
.com
/embed
/episode
/id/6385796/',
17 'md5
': '2a55e75496c790cdeb058e7e6c087746
',
21 'title
': 'Champion Minded
- Developing a Growth Mindset
',
22 # description fetched using another request:
23 # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796
24 # 'description
': 'In this episode
, Allistair talks about the importance of developing a growth mindset
, not only
in sports
, but
in life too
.',
25 'upload_date
': '20180320',
26 'thumbnail
': 're
:^https?
://.*',
29 'url
': 'https
://html5
-player
.libsyn
.com
/embed
/episode
/id/3727166/height
/75/width
/200/theme
/standard
/direction
/no
/autoplay
/no
/autonext
/no
/thumbnail
/no
/preload
/no
/no_addthis
/no
/',
30 'md5
': '6c5cb21acd622d754d3b1a92b582ce42
',
34 'title
': 'Clients From Hell Podcast
- How a Sex Toy Company Kickstarted my Freelance Career
',
35 'upload_date
': '20150818',
36 'thumbnail
': 're
:^https?
://.*',
40 def _real_extract(self, url):
41 url, video_id = self._match_valid_url(url).groups()
42 webpage = self._download_webpage(url, video_id)
44 data = self._parse_json(self._search_regex(
45 r'var\s
+playlistItem\s
*=\s
*({.+?
});',
46 webpage, 'JSON data block
'), video_id)
48 episode_title = data.get('item_title
') or get_element_by_class('episode
-title
', webpage)
51 [r'data
-title
="([^"]+)"', r'<title>(.+?)</title>'],
52 webpage, 'episode title')
53 episode_title = episode_title.strip()
55 podcast_title = strip_or_none(clean_html(self._search_regex(
56 r'<h3>([^<]+)</h3>', webpage, 'podcast title',
57 default=None) or get_element_by_class('podcast-title', webpage)))
59 title = f'{podcast_title} - {episode_title}' if podcast_title else episode_title
62 for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')):
68 'format_id': format_id,
71 description = self._html_search_regex(
72 r'<p\s+id="info_text_body
">(.+?)</p>', webpage,
73 'description', default=None)
75 # Strip non-breaking and normal spaces
76 description = description.replace('\u00A0', ' ').strip()
77 release_date = unified_strdate(self._search_regex(
78 r'<div class="release_date
">Released: ([^<]+)<',
79 webpage, 'release date', default=None) or data.get('release_date'))
84 'description': description,
85 'thumbnail': data.get('thumbnail_url'),
86 'upload_date': release_date,
87 'duration': parse_duration(data.get('duration')),