3 from .common
import InfoExtractor
15 class CamFMShowIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?camfm\.co\.uk/shows/(?P<id>[^/]+)'
18 'playlist_mincount': 5,
19 'url': 'https://camfm.co.uk/shows/soul-mining/',
22 'thumbnail': 'md5:6a873091f92c936f23bdcce80f75e66a',
23 'title': 'Soul Mining',
24 'description': 'Telling the stories of jazz, funk and soul from all corners of the world.',
28 def _real_extract(self
, url
):
29 show_id
= self
._match
_id
(url
)
30 page
= self
._download
_webpage
(url
, show_id
)
35 'entries': [self
.url_result(urljoin('https://camfm.co.uk', i
), CamFMEpisodeIE
)
36 for i
in re
.findall(r
"javascript:popup\('(/player/[^']+)', 'listen'", page
)],
37 'thumbnail': urljoin('https://camfm.co.uk', self
._search
_regex
(
38 r
'<img[^>]+class="thumb-expand"[^>]+src="([^"]+)"', page
, 'thumbnail', fatal
=False)),
39 'title': self
._html
_search
_regex
('<h1>([^<]+)</h1>', page
, 'title', fatal
=False),
40 'description': clean_html(get_element_by_class('small-12 medium-8 cell', page
)),
44 class CamFMEpisodeIE(InfoExtractor
):
45 _VALID_URL
= r
'https?://(?:www\.)?camfm\.co\.uk/player/(?P<id>[^/]+)'
47 'url': 'https://camfm.co.uk/player/43336',
48 'skip': 'Episode will expire - don\'t actually know when, but it will go eventually',
51 'title': 'AITAA: Am I the Agony Aunt? - 19:00 Tue 16/05/2023',
53 'upload_date': '20230516',
54 'description': 'md5:f165144f94927c0f1bfa2ee6e6ab7bbf',
55 'timestamp': 1684263600,
56 'series': 'AITAA: Am I the Agony Aunt?',
57 'thumbnail': 'md5:5980a831360d0744c3764551be3d09c1',
58 'categories': ['Entertainment'],
62 def _real_extract(self
, url
):
63 episode_id
= self
._match
_id
(url
)
64 page
= self
._download
_webpage
(url
, episode_id
)
65 audios
= self
._parse
_html
5_media
_entries
('https://audio.camfm.co.uk', page
, episode_id
)
67 caption
= get_element_by_class('caption', page
)
68 series
= clean_html(re
.sub(r
'<span[^<]+<[^<]+>', '', caption
))
70 card_section
= get_element_by_class('card-section', page
)
71 date
= self
._html
_search
_regex
('>Aired at ([^<]+)<', card_section
, 'air date', fatal
=False)
75 'title': join_nonempty(series
, date
, delim
=' - '),
76 'formats': traverse_obj(audios
, (..., 'formats', ...)),
77 'timestamp': unified_timestamp(date
), # XXX: Does not account for UK's daylight savings
79 'description': clean_html(re
.sub(r
'<b>[^<]+</b><br[^>]+/>', '', card_section
)),
80 'thumbnail': urljoin('https://camfm.co.uk', self
._search
_regex
(
81 r
'<div[^>]+class="cover-art"[^>]+style="[^"]+url\(\'([^
\']+)',
82 page, 'thumbnail
', fatal=False)),
83 'categories
': get_elements_by_class('label
', caption),