3 from .common
import InfoExtractor
4 from ..networking
.exceptions
import HTTPError
13 from ..utils
.traversal
import traverse_obj
16 class DigitalConcertHallIE(InfoExtractor
):
17 IE_DESC
= 'DigitalConcertHall extractor'
18 _VALID_URL
= r
'https?://(?:www\.)?digitalconcerthall\.com/(?P<language>[a-z]+)/(?P<type>film|concert|work)/(?P<id>[0-9]+)-?(?P<part>[0-9]+)?'
19 _NETRC_MACHINE
= 'digitalconcerthall'
21 'note': 'Playlist with only one video',
22 'url': 'https://www.digitalconcerthall.com/en/concert/53201',
26 'composer': 'Kurt Weill',
27 'title': '[Magic Night]',
28 'thumbnail': r
're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
29 'upload_date': '20210624',
30 'timestamp': 1624548600,
32 'album_artists': ['Members of the Berliner Philharmoniker', 'Simon Rössler'],
33 'composers': ['Kurt Weill'],
35 'params': {'skip_download': 'm3u8'},
37 'note': 'Concert with several works and an interview',
38 'url': 'https://www.digitalconcerthall.com/en/concert/53785',
41 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
42 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
43 'thumbnail': r
're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
45 'params': {'skip_download': 'm3u8'},
48 'url': 'https://www.digitalconcerthall.com/en/film/388',
52 'title': 'The Berliner Philharmoniker and Frank Peter Zimmermann',
53 'description': 'md5:cfe25a7044fa4be13743e5089b5b5eb2',
54 'thumbnail': r
're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
55 'upload_date': '20220714',
56 'timestamp': 1657785600,
57 'album_artists': ['Frank Peter Zimmermann', 'Benedikt von Bernstorff', 'Jakob von Bernstorff'],
59 'params': {'skip_download': 'm3u8'},
61 'note': 'Concert with several works and an interview',
62 'url': 'https://www.digitalconcerthall.com/en/work/53785-1',
65 'album_artists': ['Berliner Philharmoniker', 'Kirill Petrenko'],
66 'title': 'Kirill Petrenko conducts Mendelssohn and Shostakovich',
67 'thumbnail': r
're:^https?://images.digitalconcerthall.com/cms/thumbnails.*\.jpg$',
69 'params': {'skip_download': 'm3u8'},
72 _LOGIN_HINT
= ('Use --username token --password ACCESS_TOKEN where ACCESS_TOKEN '
73 'is the "access_token_production" from your browser local storage')
74 _REFRESH_HINT
= 'or else use a "refresh_token" with --username refresh --password REFRESH_TOKEN'
75 _OAUTH_URL
= 'https://api.digitalconcerthall.com/v2/oauth2/token'
76 _CLIENT_ID
= 'dch.webapp'
77 _CLIENT_SECRET
= '2ySLN+2Fwb'
78 _USER_AGENT
= 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15'
80 'Accept': 'application/json',
81 'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
82 'Origin': 'https://www.digitalconcerthall.com',
83 'Referer': 'https://www.digitalconcerthall.com/',
84 'User-Agent': _USER_AGENT
,
87 _access_token_expiry
= 0
91 def _access_token_is_expired(self
):
92 return self
._access
_token
_expiry
- 30 <= int(time
.time())
94 def _set_access_token(self
, value
):
95 self
._access
_token
= value
96 self
._access
_token
_expiry
= traverse_obj(value
, ({jwt_decode_hs256}
, 'exp', {int}
)) or 0
98 def _cache_tokens(self
, /):
99 self
.cache
.store(self
._NETRC
_MACHINE
, 'tokens', {
100 'access_token': self
._access
_token
,
101 'refresh_token': self
._refresh
_token
,
104 def _fetch_new_tokens(self
, invalidate
=False):
106 self
.report_warning('Access token has been invalidated')
107 self
._set
_access
_token
(None)
109 if not self
._access
_token
_is
_expired
:
112 if not self
._refresh
_token
:
113 self
._set
_access
_token
(None)
115 raise ExtractorError(
116 'Access token has expired or been invalidated. '
117 'Get a new "access_token_production" value from your browser '
118 f
'and try again, {self._REFRESH_HINT}', expected
=True)
120 # If we only have a refresh token, we need a temporary "initial token" for the refresh flow
121 bearer_token
= self
._access
_token
or self
._download
_json
(
122 self
._OAUTH
_URL
, None, 'Obtaining initial token', 'Unable to obtain initial token',
123 data
=urlencode_postdata({
125 'grant_type': 'device',
126 'device_vendor': 'unknown',
127 # device_model 'Safari' gets split streams of 4K/HEVC video and lossless/FLAC audio,
128 # but this is no longer effective since actual login is not possible anymore
129 'device_model': 'unknown',
130 'app_id': self
._CLIENT
_ID
,
131 'app_distributor': 'berlinphil',
132 'app_version': '1.95.0',
133 'client_secret': self
._CLIENT
_SECRET
,
134 }), headers
=self
._OAUTH
_HEADERS
)['access_token']
137 response
= self
._download
_json
(
138 self
._OAUTH
_URL
, None, 'Refreshing token', 'Unable to refresh token',
139 data
=urlencode_postdata({
140 'grant_type': 'refresh_token',
141 'refresh_token': self
._refresh
_token
,
142 'client_id': self
._CLIENT
_ID
,
143 'client_secret': self
._CLIENT
_SECRET
,
145 **self
._OAUTH
_HEADERS
,
146 'Authorization': f
'Bearer {bearer_token}',
148 except ExtractorError
as e
:
149 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 401:
150 self
._set
_access
_token
(None)
151 self
._refresh
_token
= None
153 raise ExtractorError('Your tokens have been invalidated', expected
=True)
156 self
._set
_access
_token
(response
['access_token'])
157 if refresh_token
:= traverse_obj(response
, ('refresh_token', {str}
)):
158 self
.write_debug('New refresh token granted')
159 self
._refresh
_token
= refresh_token
162 def _perform_login(self
, username
, password
):
165 if username
== 'refresh':
166 self
._refresh
_token
= password
167 self
._fetch
_new
_tokens
()
169 if username
== 'token':
170 if not traverse_obj(password
, {jwt_decode_hs256}
):
171 raise ExtractorError(
172 f
'The access token passed to yt-dlp is not valid. {self._LOGIN_HINT}', expected
=True)
173 self
._set
_access
_token
(password
)
176 if username
in ('refresh', 'token'):
177 if self
.get_param('cachedir') is not False:
178 token_type
= 'access' if username
== 'token' else 'refresh'
179 self
.to_screen(f
'Your {token_type} token has been cached to disk. To use the cached '
180 'token next time, pass --username cache along with any password')
183 if username
!= 'cache':
184 raise ExtractorError(
185 'Login with username and password is no longer supported '
186 f
'for this site. {self._LOGIN_HINT}, {self._REFRESH_HINT}', expected
=True)
188 # Try cached access_token
189 cached_tokens
= self
.cache
.load(self
._NETRC
_MACHINE
, 'tokens', default
={})
190 self
._set
_access
_token
(cached_tokens
.get('access_token'))
191 self
._refresh
_token
= cached_tokens
.get('refresh_token')
192 if not self
._access
_token
_is
_expired
:
195 # Try cached refresh_token
196 self
._fetch
_new
_tokens
(invalidate
=True)
198 def _real_initialize(self
):
199 if not self
._access
_token
:
200 self
.raise_login_required(
201 'All content on this site is only available for registered users. '
202 f
'{self._LOGIN_HINT}, {self._REFRESH_HINT}', method
=None)
204 def _entries(self
, items
, language
, type_
, **kwargs
):
206 video_id
= item
['id']
208 for should_retry
in (True, False):
209 self
._fetch
_new
_tokens
(invalidate
=not should_retry
)
211 stream_info
= self
._download
_json
(
212 self
._proto
_relative
_url
(item
['_links']['streams']['href']), video_id
, headers
={
213 'Accept': 'application/json',
214 'Authorization': f
'Bearer {self._access_token}',
215 'Accept-Language': language
,
216 'User-Agent': self
._USER
_AGENT
,
219 except ExtractorError
as error
:
220 if should_retry
and isinstance(error
.cause
, HTTPError
) and error
.cause
.status
== 401:
225 for m3u8_url
in traverse_obj(stream_info
, ('channel', ..., 'stream', ..., 'url', {url_or_none}
)):
226 formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False))
228 if fmt
.get('format_note') and fmt
.get('vcodec') == 'none':
229 fmt
.update(parse_codecs(fmt
['format_note']))
233 'title': item
.get('title'),
234 'composer': item
.get('name_composer'),
236 'duration': item
.get('duration_total'),
237 'timestamp': traverse_obj(item
, ('date', 'published')),
238 'description': item
.get('short_description') or stream_info
.get('short_description'),
241 'start_time': chapter
.get('time'),
242 'end_time': try_get(chapter
, lambda x
: x
['time'] + x
['duration']),
243 'title': chapter
.get('text'),
244 } for chapter
in item
['cuepoints']] if item
.get('cuepoints') and type_
== 'concert' else None,
247 def _real_extract(self
, url
):
248 language
, type_
, video_id
, part
= self
._match
_valid
_url
(url
).group('language', 'type', 'id', 'part')
252 api_type
= 'concert' if type_
== 'work' else type_
253 vid_info
= self
._download
_json
(
254 f
'https://api.digitalconcerthall.com/v2/{api_type}/{video_id}', video_id
, headers
={
255 'Accept': 'application/json',
256 'Accept-Language': language
,
257 'User-Agent': self
._USER
_AGENT
,
259 videos
= [vid_info
] if type_
== 'film' else traverse_obj(vid_info
, ('_embedded', ..., ...))
262 videos
= [videos
[int(part
) - 1]]
264 album_artists
= traverse_obj(vid_info
, ('_links', 'artist', ..., 'name', {str}
))
265 thumbnail
= traverse_obj(vid_info
, (
266 'image', ..., {self
._proto
_relative
_url
}, {url_or_none}
,
267 {lambda x
: x
.format(width
=0, height
=0)}, any
)) # NB: 0x0 is the original size
272 'title': vid_info
.get('title'),
273 'entries': self
._entries
(
274 videos
, language
, type_
, thumbnail
=thumbnail
, album_artists
=album_artists
),
275 'thumbnail': thumbnail
,
276 'album_artists': album_artists
,