1 from .common
import InfoExtractor
9 from ..utils
.traversal
import traverse_obj
12 def result_from_props(props
):
14 **traverse_obj(props
, {
16 'title': ('title', {str}
),
17 'url': ('mediaURL', {url_or_none}
),
18 'description': ('description', {clean_html}
),
19 'thumbnail': ('image', {jwt_decode_hs256}
, 'url', {url_or_none}
),
20 'timestamp': ('timestamp', {int_or_none}
),
21 'duration': ('duration', {int_or_none}
),
28 class PodbayFMIE(InfoExtractor
):
29 _VALID_URL
= r
'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)'
31 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400',
32 'md5': '895ac8505de349515f5ee8a4a3195c93',
34 'id': '62306451f4a48e58d0c4d6a8',
35 'title': 'Part One: Kissinger',
37 'description': r
're:^We begin our epic six part series on Henry Kissinger.+',
38 'thumbnail': r
're:^https?://.*\.jpg',
39 'timestamp': 1647338400,
41 'upload_date': '20220315',
45 def _real_extract(self
, url
):
46 episode_id
= self
._match
_id
(url
)
47 webpage
= self
._download
_webpage
(url
, episode_id
)
48 data
= self
._search
_nextjs
_data
(webpage
, episode_id
)
49 return result_from_props(data
['props']['pageProps']['episode'])
52 class PodbayFMChannelIE(InfoExtractor
):
53 _VALID_URL
= r
'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])'
55 'url': 'https://podbay.fm/p/behind-the-bastards',
57 'id': 'behind-the-bastards',
58 'title': 'Behind the Bastards',
60 'playlist_mincount': 21,
64 def _fetch_page(self
, channel_id
, pagenum
):
65 return self
._download
_json
(
66 f
'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}',
67 f
'Downloading channel JSON page {pagenum + 1}', channel_id
)['podcast']
70 def _results_from_page(channel_id
, page
):
72 **result_from_props(e
),
73 'extractor': PodbayFMIE
.IE_NAME
,
74 'extractor_key': PodbayFMIE
.ie_key(),
75 # somehow they use timestamps as the episode identifier
76 'webpage_url': f
'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}',
77 } for e
in page
['episodes']]
79 def _real_extract(self
, url
):
80 channel_id
= self
._match
_id
(url
)
82 first_page
= self
._fetch
_page
(channel_id
, 0)
83 entries
= OnDemandPagedList(
84 lambda pagenum
: self
._results
_from
_page
(
85 channel_id
, self
._fetch
_page
(channel_id
, pagenum
) if pagenum
else first_page
),
88 return self
.playlist_result(entries
, channel_id
, first_page
.get('title'))