3 from .common
import InfoExtractor
12 class TuneInBaseIE(InfoExtractor
):
13 _VALID_URL_BASE
= r
'https?://(?:www\.)?tunein\.com'
15 def _extract_metadata(self
, webpage
, content_id
):
16 return self
._search
_json
(r
'window.INITIAL_STATE=', webpage
, 'hydration', content_id
, fatal
=False)
18 def _extract_formats_and_subtitles(self
, content_id
):
19 streams
= self
._download
_json
(
20 f
'https://opml.radiotime.com/Tune.ashx?render=json&formats=mp3,aac,ogg,flash,hls&id={content_id}',
23 formats
, subtitles
= [], {}
24 for stream
in streams
:
25 if stream
.get('media_type') == 'hls':
26 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(stream
['url'], content_id
, fatal
=False)
28 self
._merge
_subtitles
(subs
, target
=subtitles
)
29 elif determine_ext(stream
['url']) == 'pls':
30 playlist_content
= self
._download
_webpage
(stream
['url'], content_id
)
32 'url': self
._search
_regex
(r
'File1=(.*)', playlist_content
, 'url', fatal
=False),
33 'abr': stream
.get('bitrate'),
34 'ext': stream
.get('media_type'),
39 'abr': stream
.get('bitrate'),
40 'ext': stream
.get('media_type'),
43 return formats
, subtitles
46 class TuneInStationIE(TuneInBaseIE
):
47 _VALID_URL
= TuneInBaseIE
._VALID
_URL
_BASE
+ r
'(?:/radio/[^?#]+-|/embed/player/)(?P<id>s\d+)'
48 _EMBED_REGEX
= [r
'<iframe[^>]+src=["\'](?P
<url
>(?
:https?
://)?tunein\
.com
/embed
/player
/s\d
+)']
51 'url
': 'https
://tunein
.com
/radio
/Jazz24
-885-s34682
/',
54 'title
': 're
:^Jazz24
',
55 'description
': 'md5
:d6d0b89063fd68d529fa7058ee98619b
',
56 'thumbnail
': 're
:^https?
://[^?
&]+/s34682
',
57 'location
': 'Seattle
-Tacoma
, US
',
59 'live_status
': 'is_live
',
62 'skip_download
': True,
65 'url
': 'https
://tunein
.com
/embed
/player
/s6404
/',
66 'only_matching
': True,
68 'url
': 'https
://tunein
.com
/radio
/BBC
-Radio
-1-988-s24939
/',
71 'title
': 're
:^BBC Radio
1',
72 'description
': 'md5
:f3f75f7423398d87119043c26e7bfb84
',
73 'thumbnail
': 're
:^https?
://[^?
&]+/s24939
',
74 'location
': 'London
, UK
',
76 'live_status
': 'is_live
',
79 'skip_download
': True,
83 def _real_extract(self, url):
84 station_id = self._match_id(url)
86 webpage = self._download_webpage(url, station_id)
87 metadata = self._extract_metadata(webpage, station_id)
89 formats, subtitles = self._extract_formats_and_subtitles(station_id)
92 'title
': traverse_obj(metadata, ('profiles
', station_id, 'title
')),
93 'description
': traverse_obj(metadata, ('profiles
', station_id, 'description
')),
94 'thumbnail
': traverse_obj(metadata, ('profiles
', station_id, 'image
')),
95 'timestamp
': parse_iso8601(
96 traverse_obj(metadata, ('profiles
', station_id, 'actions
', 'play
', 'publishTime
'))),
97 'location
': traverse_obj(
98 metadata, ('profiles
', station_id, 'metadata
', 'properties
', 'location
', 'displayName
'),
99 ('profiles
', station_id, 'properties
', 'location
', 'displayName
')),
101 'subtitles
': subtitles,
102 'is_live
': traverse_obj(metadata, ('profiles
', station_id, 'actions
', 'play
', 'isLive
')),
106 class TuneInPodcastIE(TuneInBaseIE):
107 _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'/(?
:podcasts
/[^?
#]+-|embed/player/)(?P<id>p\d+)/?(?:#|$)'
108 _EMBED_REGEX
= [r
'<iframe[^>]+src=["\'](?P
<url
>(?
:https?
://)?tunein\
.com
/embed
/player
/p\d
+)']
111 'url
': 'https
://tunein
.com
/podcasts
/Technology
-Podcasts
/Artificial
-Intelligence
-p1153019
',
114 'title
': 'Lex Fridman Podcast
',
115 'description
': 'md5
:bedc4e5f1c94f7dec6e4317b5654b00d
',
117 'playlist_mincount
': 200,
119 'url
': 'https
://tunein
.com
/embed
/player
/p191660
/',
120 'only_matching
': True
122 'url
': 'https
://tunein
.com
/podcasts
/World
-News
/BBC
-News
-p14
/',
126 'description
': 'md5
:1218e575eeaff75f48ed978261fa2068
',
128 'playlist_mincount
': 200,
133 def _real_extract(self, url):
134 podcast_id = self._match_id(url)
136 webpage = self._download_webpage(url, podcast_id, fatal=False)
137 metadata = self._extract_metadata(webpage, podcast_id)
139 def page_func(page_num):
140 api_response = self._download_json(
141 f'https
://api
.tunein
.com
/profiles
/{podcast_id}
/contents
', podcast_id,
142 note=f'Downloading page
{page_num
+ 1}', query={
144 'offset
': page_num * self._PAGE_SIZE,
145 'limit
': self._PAGE_SIZE,
150 f'https
://tunein
.com
/podcasts
/{podcast_id}?topicId
={episode
["GuideId"][1:]}',
151 TuneInPodcastEpisodeIE, title=episode.get('Title
'))
152 for episode in api_response['Items
']]
154 entries = OnDemandPagedList(page_func, self._PAGE_SIZE)
155 return self.playlist_result(
156 entries, playlist_id=podcast_id, title=traverse_obj(metadata, ('profiles
', podcast_id, 'title
')),
157 description=traverse_obj(metadata, ('profiles
', podcast_id, 'description
')))
160 class TuneInPodcastEpisodeIE(TuneInBaseIE):
161 _VALID_URL = TuneInBaseIE._VALID_URL_BASE + r'/podcasts
/(?
:[^?
&]+-)?
(?P
<podcast_id
>p\d
+)/?
\?topicId
=(?P
<id>\w\d
+)'
164 'url
': 'https
://tunein
.com
/podcasts
/Technology
-Podcasts
/Artificial
-Intelligence
-p1153019
/?topicId
=236404354',
167 'title
': '#351 \u2013 MrBeast: Future of YouTube, Twitter, TikTok, and Instagram',
168 'description': 'md5:e1734db6f525e472c0c290d124a2ad77',
169 'thumbnail': 're:^https?://[^?&]+/p1153019',
170 'timestamp': 1673458571,
171 'upload_date': '20230111',
172 'series_id': 'p1153019',
173 'series': 'Lex Fridman Podcast',
178 def _real_extract(self
, url
):
179 podcast_id
, episode_id
= self
._match
_valid
_url
(url
).group('podcast_id', 'id')
180 episode_id
= f
't{episode_id}'
182 webpage
= self
._download
_webpage
(url
, episode_id
)
183 metadata
= self
._extract
_metadata
(webpage
, episode_id
)
185 formats
, subtitles
= self
._extract
_formats
_and
_subtitles
(episode_id
)
188 'title': traverse_obj(metadata
, ('profiles', episode_id
, 'title')),
189 'description': traverse_obj(metadata
, ('profiles', episode_id
, 'description')),
190 'thumbnail': traverse_obj(metadata
, ('profiles', episode_id
, 'image')),
191 'timestamp': parse_iso8601(
192 traverse_obj(metadata
, ('profiles', episode_id
, 'actions', 'play', 'publishTime'))),
193 'series_id': podcast_id
,
194 'series': traverse_obj(metadata
, ('profiles', podcast_id
, 'title')),
196 'subtitles': subtitles
,
200 class TuneInShortenerIE(InfoExtractor
):
201 IE_NAME
= 'tunein:shortener'
202 IE_DESC
= False # Do not list
203 _VALID_URL
= r
'https?://tun\.in/(?P<id>[A-Za-z0-9]+)'
207 'url': 'http://tun.in/ser7s',
210 'title': 're:^Jazz24',
211 'description': 'md5:d6d0b89063fd68d529fa7058ee98619b',
212 'thumbnail': 're:^https?://[^?&]+/s34682',
213 'location': 'Seattle-Tacoma, US',
215 'live_status': 'is_live',
218 'skip_download': True, # live stream
222 def _real_extract(self
, url
):
223 redirect_id
= self
._match
_id
(url
)
224 # The server doesn't support HEAD requests
225 urlh
= self
._request
_webpage
(
226 url
, redirect_id
, note
='Downloading redirect page')
229 url_parsed
= urllib
.parse
.urlparse(url
)
230 if url_parsed
.port
== 443:
231 url
= url_parsed
._replace
(netloc
=url_parsed
.hostname
).url
233 self
.to_screen('Following redirect: %s' % url
)
234 return self
.url_result(url
)