4 from .common
import InfoExtractor
15 class PodchaserIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?'
19 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585',
22 'title': 'Ep. 285 – freeze me off',
23 'description': 'cam ahn',
24 'thumbnail': r
're:^https?://.*\.jpg$',
26 'categories': ['Comedy'],
27 'tags': ['comedy', 'dark humor'],
30 'timestamp': 1636531259,
31 'upload_date': '20211110',
35 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
38 'title': 'The Bone Zone',
39 'description': 'Podcast by The Bone Zone',
43 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
46 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas',
47 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1'
49 'playlist_mincount': 225
53 def _parse_episode(episode
, podcast
):
55 'id': str(episode
.get('id')),
56 'title': episode
.get('title'),
57 'description': episode
.get('description'),
58 'url': episode
.get('audio_url'),
59 'thumbnail': episode
.get('image_url'),
60 'duration': str_to_int(episode
.get('length')),
61 'timestamp': unified_timestamp(episode
.get('air_date')),
62 'average_rating': float_or_none(episode
.get('rating')),
63 'categories': list(set(traverse_obj(podcast
, (('summary', None), 'categories', ..., 'text')))),
64 'tags': traverse_obj(podcast
, ('tags', ..., 'text')),
65 'series': podcast
.get('title'),
68 def _call_api(self
, path
, *args
, **kwargs
):
69 return self
._download
_json
(f
'https://api.podchaser.com/{path}', *args
, **kwargs
)
71 def _fetch_page(self
, podcast_id
, podcast
, page
):
72 json_response
= self
._call
_api
(
73 'list/episode', podcast_id
,
74 headers
={'Content-Type': 'application/json;charset=utf-8'},
76 'start': page
* self
._PAGE
_SIZE
,
77 'count': self
._PAGE
_SIZE
,
78 'sort_order': 'SORT_ORDER_RECENT',
80 'podcast_id': podcast_id
85 for episode
in json_response
['entities']:
86 yield self
._parse
_episode
(episode
, podcast
)
88 def _real_extract(self
, url
):
89 podcast_id
, episode_id
= self
._match
_valid
_url
(url
).group('podcast_id', 'id')
90 podcast
= self
._call
_api
(f
'podcasts/{podcast_id}', episode_id
or podcast_id
)
92 return self
.playlist_result(
93 OnDemandPagedList(functools
.partial(self
._fetch
_page
, podcast_id
, podcast
), self
._PAGE
_SIZE
),
94 str_or_none(podcast
.get('id')), podcast
.get('title'), podcast
.get('description'))
96 episode
= self
._call
_api
(f
'episodes/{episode_id}', episode_id
)
97 return self
._parse
_episode
(episode
, podcast
)