6 from .common
import InfoExtractor
15 from ..utils
.traversal
import traverse_obj
18 class RadikoBaseIE(InfoExtractor
):
21 _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED
= (
22 'https://c-rpaa.smartstream.ne.jp',
23 'https://si-c-radiko.smartstream.ne.jp',
24 'https://tf-f-rpaa-radiko.smartstream.ne.jp',
25 'https://tf-c-rpaa-radiko.smartstream.ne.jp',
26 'https://si-f-radiko.smartstream.ne.jp',
27 'https://rpaa.smartstream.ne.jp',
29 _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED
= (
30 'https://rd-wowza-radiko.radiko-cf.com',
32 'https://f-radiko.smartstream.ne.jp',
34 # Following URL forcibly connects not Time Free but Live
36 'https://c-radiko.smartstream.ne.jp',
39 def _negotiate_token(self
):
40 _
, auth1_handle
= self
._download
_webpage
_handle
(
41 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
43 'x-radiko-app': 'pc_html5',
44 'x-radiko-app-version': '0.0.1',
45 'x-radiko-device': 'pc',
46 'x-radiko-user': 'dummy_user',
48 auth1_header
= auth1_handle
.headers
50 auth_token
= auth1_header
['X-Radiko-AuthToken']
51 kl
= int(auth1_header
['X-Radiko-KeyLength'])
52 ko
= int(auth1_header
['X-Radiko-KeyOffset'])
53 raw_partial_key
= self
._extract
_full
_key
()[ko
:ko
+ kl
]
54 partial_key
= base64
.b64encode(raw_partial_key
).decode()
56 area_id
= self
._download
_webpage
(
57 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
59 'x-radiko-device': 'pc',
60 'x-radiko-user': 'dummy_user',
61 'x-radiko-authtoken': auth_token
,
62 'x-radiko-partialkey': partial_key
,
66 self
.raise_geo_restricted(countries
=['JP'])
68 auth_data
= (auth_token
, area_id
)
69 self
.cache
.store('radiko', 'auth_data', auth_data
)
72 def _auth_client(self
):
73 cachedata
= self
.cache
.load('radiko', 'auth_data')
74 if cachedata
is not None:
75 response
= self
._download
_webpage
(
76 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status
=401,
77 headers
={'X-Radiko-AuthToken': cachedata
[0], 'X-Radiko-AreaId': cachedata
[1]})
80 return self
._negotiate
_token
()
82 def _extract_full_key(self
):
86 jscode
= self
._download
_webpage
(
87 'https://radiko.jp/apps/js/playerCommon.js', None,
88 note
='Downloading player js code')
89 full_key
= self
._search
_regex
(
90 (r
"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\
1,\s
*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
91 jscode, 'full key
', fatal=False, group='fullkey
')
94 full_key = full_key.encode()
95 else: # use only full key ever known
96 full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa
'
98 self._FULL_KEY = full_key
101 def _find_program(self, video_id, station, cursor):
102 station_program = self._download_xml(
103 'https
://radiko
.jp
/v3
/program
/station
/weekly
/%s.xml
' % station, video_id,
104 note='Downloading radio program
for %s station
' % station)
107 for p in station_program.findall('.//prog
'):
108 ft_str, to_str = p.attrib['ft
'], p.attrib['to
']
109 ft = unified_timestamp(ft_str, False)
110 to = unified_timestamp(to_str, False)
111 if ft <= cursor and cursor < to:
115 raise ExtractorError('Cannot identify radio program to download
!')
117 return prog, station_program, ft, ft_str, to_str
119 def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
120 m3u8_playlist_data = self._download_xml(
121 f'https
://radiko
.jp
/v3
/station
/stream
/pc_html5
/{station}
.xml
', video_id,
122 note='Downloading stream information
')
127 timefree_int = 0 if is_onair else 1
129 for element in m3u8_playlist_data.findall(f'.//url
[@timefree="{timefree_int}"]/playlist_create_url
'):
134 playlist_url = update_url_query(pcu, {
135 'station_id
': station,
138 'lsid
': ''.join(random.choices('0123456789abcdef
', k=32)),
142 time_to_skip = None if is_onair else cursor - ft
144 domain = urllib.parse.urlparse(playlist_url).netloc
145 subformats = self._extract_m3u8_formats(
146 playlist_url, video_id, ext='m4a
',
147 live=True, fatal=False, m3u8_id=domain,
148 note=f'Downloading m3u8 information
from {domain}
',
150 'X
-Radiko
-AreaId
': area_id,
151 'X
-Radiko
-AuthToken
': auth_token,
153 for sf in subformats:
154 if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or (
155 not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
156 sf['preference
'] = -100
157 sf['format_note
'] = 'not preferred
'
158 if not is_onair and timefree_int == 1 and time_to_skip:
159 sf['downloader_options
'] = {'ffmpeg_args
': ['-ss
', str(time_to_skip)]}
160 formats.extend(subformats)
164 def _extract_performers(self, prog):
165 return traverse_obj(prog, (
166 'pfm
/text()', ..., {lambda x: re.split(r'[//、
,,
]', x)}, ..., {str.strip})) or None
169 class RadikoIE(RadikoBaseIE):
170 _VALID_URL = r'https?
://(?
:www\
.)?radiko\
.jp
/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<id>\d+)'
173 # QRR (文化放送) station provides <desc>
174 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
175 'only_matching': True,
177 # FMT (TOKYO FM) station does not provide <desc>
178 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
179 'only_matching': True,
181 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
182 'only_matching': True,
185 def _real_extract(self
, url
):
186 station
, video_id
= self
._match
_valid
_url
(url
).groups()
187 vid_int
= unified_timestamp(video_id
, False)
188 prog
, station_program
, ft
, radio_begin
, radio_end
= self
._find
_program
(video_id
, station
, vid_int
)
190 auth_token
, area_id
= self
._auth
_client
()
194 'title': try_call(lambda: prog
.find('title').text
),
195 'cast': self
._extract
_performers
(prog
),
196 'description': clean_html(try_call(lambda: prog
.find('info').text
)),
197 'uploader': try_call(lambda: station_program
.find('.//name').text
),
198 'uploader_id': station
,
199 'timestamp': vid_int
,
200 'duration': try_call(lambda: unified_timestamp(radio_end
, False) - unified_timestamp(radio_begin
, False)),
202 'formats': self
._extract
_formats
(
203 video_id
=video_id
, station
=station
, is_onair
=False,
204 ft
=ft
, cursor
=vid_int
, auth_token
=auth_token
, area_id
=area_id
,
206 'start_at': radio_begin
,
216 class RadikoRadioIE(RadikoBaseIE
):
217 _VALID_URL
= r
'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
220 # QRR (文化放送) station provides <desc>
221 'url': 'https://radiko.jp/#!/live/QRR',
222 'only_matching': True,
224 # FMT (TOKYO FM) station does not provide <desc>
225 'url': 'https://radiko.jp/#!/live/FMT',
226 'only_matching': True,
228 'url': 'https://radiko.jp/#!/live/JOAK-FM',
229 'only_matching': True,
232 def _real_extract(self
, url
):
233 station
= self
._match
_id
(url
)
234 self
.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
236 auth_token
, area_id
= self
._auth
_client
()
237 # get current time in JST (GMT+9:00 w/o DST)
238 vid_now
= time_seconds(hours
=9)
240 prog
, station_program
, ft
, _
, _
= self
._find
_program
(station
, station
, vid_now
)
242 title
= prog
.find('title').text
243 description
= clean_html(prog
.find('info').text
)
244 station_name
= station_program
.find('.//name').text
246 formats
= self
._extract
_formats
(
247 video_id
=station
, station
=station
, is_onair
=True,
248 ft
=ft
, cursor
=vid_now
, auth_token
=auth_token
, area_id
=area_id
,
254 'cast': self
._extract
_performers
(prog
),
255 'description': description
,
256 'uploader': station_name
,
257 'uploader_id': station
,