6 from .common
import InfoExtractor
16 from ..utils
.traversal
import traverse_obj
19 class RadikoBaseIE(InfoExtractor
):
22 _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED
= (
23 'https://c-rpaa.smartstream.ne.jp',
24 'https://si-c-radiko.smartstream.ne.jp',
25 'https://tf-f-rpaa-radiko.smartstream.ne.jp',
26 'https://tf-c-rpaa-radiko.smartstream.ne.jp',
27 'https://si-f-radiko.smartstream.ne.jp',
28 'https://rpaa.smartstream.ne.jp',
30 _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED
= (
31 'https://rd-wowza-radiko.radiko-cf.com',
33 'https://f-radiko.smartstream.ne.jp',
35 # Following URL forcibly connects not Time Free but Live
37 'https://c-radiko.smartstream.ne.jp',
40 def _negotiate_token(self
):
41 _
, auth1_handle
= self
._download
_webpage
_handle
(
42 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page',
44 'x-radiko-app': 'pc_html5',
45 'x-radiko-app-version': '0.0.1',
46 'x-radiko-device': 'pc',
47 'x-radiko-user': 'dummy_user',
49 auth1_header
= auth1_handle
.headers
51 auth_token
= auth1_header
['X-Radiko-AuthToken']
52 kl
= int(auth1_header
['X-Radiko-KeyLength'])
53 ko
= int(auth1_header
['X-Radiko-KeyOffset'])
54 raw_partial_key
= self
._extract
_full
_key
()[ko
:ko
+ kl
]
55 partial_key
= base64
.b64encode(raw_partial_key
).decode()
57 area_id
= self
._download
_webpage
(
58 'https://radiko.jp/v2/api/auth2', None, 'Authenticating',
60 'x-radiko-device': 'pc',
61 'x-radiko-user': 'dummy_user',
62 'x-radiko-authtoken': auth_token
,
63 'x-radiko-partialkey': partial_key
,
67 self
.raise_geo_restricted(countries
=['JP'])
69 auth_data
= (auth_token
, area_id
)
70 self
.cache
.store('radiko', 'auth_data', auth_data
)
73 def _auth_client(self
):
74 cachedata
= self
.cache
.load('radiko', 'auth_data')
75 if cachedata
is not None:
76 response
= self
._download
_webpage
(
77 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status
=401,
78 headers
={'X-Radiko-AuthToken': cachedata
[0], 'X-Radiko-AreaId': cachedata
[1]})
81 return self
._negotiate
_token
()
83 def _extract_full_key(self
):
87 jscode
= self
._download
_webpage
(
88 'https://radiko.jp/apps/js/playerCommon.js', None,
89 note
='Downloading player js code')
90 full_key
= self
._search
_regex
(
91 (r
"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\
1,\s
*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"),
92 jscode, 'full key
', fatal=False, group='fullkey
')
95 full_key = full_key.encode()
96 else: # use only full key ever known
97 full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa
'
99 self._FULL_KEY = full_key
102 def _find_program(self, video_id, station, cursor):
103 station_program = self._download_xml(
104 f'https
://radiko
.jp
/v3
/program
/station
/weekly
/{station}
.xml
', video_id,
105 note=f'Downloading radio program
for {station} station
')
108 for p in station_program.findall('.//prog
'):
109 ft_str, to_str = p.attrib['ft
'], p.attrib['to
']
110 ft = unified_timestamp(ft_str, False)
111 to = unified_timestamp(to_str, False)
112 if ft <= cursor and cursor < to:
116 raise ExtractorError('Cannot identify radio program to download
!')
118 return prog, station_program, ft, ft_str, to_str
120 def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query):
121 m3u8_playlist_data = self._download_xml(
122 f'https
://radiko
.jp
/v3
/station
/stream
/pc_html5
/{station}
.xml
', video_id,
123 note='Downloading stream information
')
128 timefree_int = 0 if is_onair else 1
130 for element in m3u8_playlist_data.findall(f'.//url
[@timefree="{timefree_int}"]/playlist_create_url
'):
135 playlist_url = update_url_query(pcu, {
136 'station_id
': station,
139 'lsid
': ''.join(random.choices('0123456789abcdef
', k=32)),
143 time_to_skip = None if is_onair else cursor - ft
145 domain = urllib.parse.urlparse(playlist_url).netloc
146 subformats = self._extract_m3u8_formats(
147 playlist_url, video_id, ext='m4a
',
148 live=True, fatal=False, m3u8_id=domain,
149 note=f'Downloading m3u8 information
from {domain}
',
151 'X
-Radiko
-AreaId
': area_id,
152 'X
-Radiko
-AuthToken
': auth_token,
154 for sf in subformats:
155 if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or (
156 not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)):
157 sf['preference
'] = -100
158 sf['format_note
'] = 'not preferred
'
159 if not is_onair and timefree_int == 1 and time_to_skip:
160 sf['downloader_options
'] = {'ffmpeg_args
': ['-ss
', str(time_to_skip)]}
161 formats.extend(subformats)
165 def _extract_performers(self, prog):
166 return traverse_obj(prog, (
167 'pfm
/text()', ..., {lambda x: re.split(r'[//、
,,
]', x)}, ..., {str.strip})) or None
170 class RadikoIE(RadikoBaseIE):
171 _VALID_URL = r'https?
://(?
:www\
.)?radiko\
.jp
/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)'
174 # QRR (文化放送) station provides <desc>
175 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300',
176 'only_matching': True,
178 # FMT (TOKYO FM) station does not provide <desc>
179 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000',
180 'only_matching': True,
182 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000',
183 'only_matching': True,
186 def _real_extract(self
, url
):
187 station
, timestring
= self
._match
_valid
_url
(url
).group('station', 'timestring')
188 video_id
= join_nonempty(station
, timestring
)
189 vid_int
= unified_timestamp(timestring
, False)
190 prog
, station_program
, ft
, radio_begin
, radio_end
= self
._find
_program
(video_id
, station
, vid_int
)
192 auth_token
, area_id
= self
._auth
_client
()
196 'title': try_call(lambda: prog
.find('title').text
),
197 'cast': self
._extract
_performers
(prog
),
198 'description': clean_html(try_call(lambda: prog
.find('info').text
)),
199 'uploader': try_call(lambda: station_program
.find('.//name').text
),
200 'uploader_id': station
,
201 'timestamp': vid_int
,
202 'duration': try_call(lambda: unified_timestamp(radio_end
, False) - unified_timestamp(radio_begin
, False)),
204 'formats': self
._extract
_formats
(
205 video_id
=video_id
, station
=station
, is_onair
=False,
206 ft
=ft
, cursor
=vid_int
, auth_token
=auth_token
, area_id
=area_id
,
208 'start_at': radio_begin
,
218 class RadikoRadioIE(RadikoBaseIE
):
219 _VALID_URL
= r
'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)'
222 # QRR (文化放送) station provides <desc>
223 'url': 'https://radiko.jp/#!/live/QRR',
224 'only_matching': True,
226 # FMT (TOKYO FM) station does not provide <desc>
227 'url': 'https://radiko.jp/#!/live/FMT',
228 'only_matching': True,
230 'url': 'https://radiko.jp/#!/live/JOAK-FM',
231 'only_matching': True,
234 def _real_extract(self
, url
):
235 station
= self
._match
_id
(url
)
236 self
.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop')
238 auth_token
, area_id
= self
._auth
_client
()
239 # get current time in JST (GMT+9:00 w/o DST)
240 vid_now
= time_seconds(hours
=9)
242 prog
, station_program
, ft
, _
, _
= self
._find
_program
(station
, station
, vid_now
)
244 title
= prog
.find('title').text
245 description
= clean_html(prog
.find('info').text
)
246 station_name
= station_program
.find('.//name').text
248 formats
= self
._extract
_formats
(
249 video_id
=station
, station
=station
, is_onair
=True,
250 ft
=ft
, cursor
=vid_now
, auth_token
=auth_token
, area_id
=area_id
,
256 'cast': self
._extract
_performers
(prog
),
257 'description': description
,
258 'uploader': station_name
,
259 'uploader_id': station
,