[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / stageplus.py
blob639907269362108723712d67e39e9207019ffed1
1 import json
2 import uuid
4 from .common import InfoExtractor
5 from ..utils import (
6 float_or_none,
7 traverse_obj,
8 try_call,
9 unified_timestamp,
10 url_or_none,
14 class StagePlusVODConcertIE(InfoExtractor):
15 _NETRC_MACHINE = 'stageplus'
16 _VALID_URL = r'https?://(?:www\.)?stage-plus\.com/video/(?P<id>vod_concert_\w+)'
17 _TESTS = [{
18 'url': 'https://www.stage-plus.com/video/vod_concert_APNM8GRFDPHMASJKBSPJACG',
19 'playlist_count': 6,
20 'info_dict': {
21 'id': 'vod_concert_APNM8GRFDPHMASJKBSPJACG',
22 'title': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
23 'description': 'md5:50f78ec180518c9bdb876bac550996fc',
24 'artists': ['Yuja Wang', 'Lorenzo Viotti'],
25 'upload_date': '20230331',
26 'timestamp': 1680249600,
27 'release_date': '20210709',
28 'release_timestamp': 1625788800,
29 'thumbnails': 'count:3',
31 'playlist': [{
32 'info_dict': {
33 'id': 'performance_work_A1IN4PJFE9MM2RJ3CLBMUSJBBSOJAD9O',
34 'ext': 'mp4',
35 'title': 'Piano Concerto No. 2 in C Minor, Op. 18',
36 'description': 'md5:50f78ec180518c9bdb876bac550996fc',
37 'upload_date': '20230331',
38 'timestamp': 1680249600,
39 'release_date': '20210709',
40 'release_timestamp': 1625788800,
41 'duration': 2207,
42 'chapters': 'count:5',
43 'artists': ['Yuja Wang'],
44 'composers': ['Sergei Rachmaninoff'],
45 'album': 'Yuja Wang plays Rachmaninoff\'s Piano Concerto No. 2 – from Odeonsplatz',
46 'album_artists': ['Yuja Wang', 'Lorenzo Viotti'],
47 'track': 'Piano Concerto No. 2 in C Minor, Op. 18',
48 'track_number': 1,
49 'genre': 'Instrumental Concerto',
51 }],
52 'params': {'skip_download': 'm3u8'},
55 # TODO: Prune this after livestream and/or album extractors are added
56 _GRAPHQL_QUERY = '''query videoDetailPage($videoId: ID!, $sliderItemsFirst: Int = 24) {
57 node(id: $videoId) {
58 __typename
59 ...LiveConcertFields
60 ... on LiveConcert {
61 artists {
62 edges {
63 role {
64 ...RoleFields
66 node {
68 name
69 sortName
73 isAtmos
74 maxResolution
75 groups {
77 name
78 typeDisplayName
80 shortDescription
81 performanceWorks {
82 ...livePerformanceWorkFields
84 totalDuration
85 sliders {
86 ...contentContainerFields
88 vodConcert {
89 __typename
93 ...VideoFields
94 ... on Video {
95 artists {
96 edges {
97 role {
98 ...RoleFields
100 node {
102 name
103 sortName
107 isAtmos
108 maxResolution
109 isLossless
110 description
111 productionDate
112 takedownDate
113 sliders {
114 ...contentContainerFields
117 ...VodConcertFields
118 ... on VodConcert {
119 artists {
120 edges {
121 role {
122 ...RoleFields
124 node {
126 name
127 sortName
131 isAtmos
132 maxResolution
133 groups {
135 name
136 typeDisplayName
138 performanceWorks {
139 ...PerformanceWorkFields
141 shortDescription
142 productionDate
143 takedownDate
144 sliders {
145 ...contentContainerFields
151 fragment LiveConcertFields on LiveConcert {
152 endTime
154 pictures {
155 ...PictureFields
157 reruns {
158 ...liveConcertRerunFields
160 publicationLevel
161 startTime
162 streamStartTime
163 subtitle
164 title
165 typeDisplayName
166 stream {
167 ...liveStreamFields
169 trailerStream {
170 ...streamFields
172 geoAccessCountries
173 geoAccessMode
176 fragment PictureFields on Picture {
179 type
182 fragment liveConcertRerunFields on LiveConcertRerun {
183 streamStartTime
184 endTime
185 startTime
186 stream {
187 ...rerunStreamFields
191 fragment rerunStreamFields on RerunStream {
192 publicationLevel
193 streamType
197 fragment liveStreamFields on LiveStream {
198 publicationLevel
199 streamType
203 fragment streamFields on Stream {
204 publicationLevel
205 streamType
209 fragment RoleFields on Role {
210 __typename
212 type
213 displayName
216 fragment livePerformanceWorkFields on LivePerformanceWork {
217 __typename
219 artists {
220 ...artistWithRoleFields
222 groups {
223 edges {
224 node {
226 name
227 typeDisplayName
231 work {
232 ...workFields
236 fragment artistWithRoleFields on ArtistWithRoleConnection {
237 edges {
238 role {
239 ...RoleFields
241 node {
243 name
244 sortName
249 fragment workFields on Work {
251 title
252 movements {
254 title
256 composers {
258 name
260 genre {
262 title
266 fragment contentContainerFields on CuratedContentContainer {
267 __typename
268 ...SliderFields
269 ...BannerFields
272 fragment SliderFields on Slider {
274 headline
275 items(first: $sliderItemsFirst) {
276 edges {
277 node {
279 __typename
280 ...AlbumFields
281 ...ArtistFields
282 ...EpochFields
283 ...GenreFields
284 ...GroupFields
285 ...LiveConcertFields
286 ...PartnerFields
287 ...PerformanceWorkFields
288 ...VideoFields
289 ...VodConcertFields
295 fragment AlbumFields on Album {
296 artistAndGroupDisplayInfo
298 pictures {
299 ...PictureFields
301 title
304 fragment ArtistFields on Artist {
306 name
307 roles {
308 ...RoleFields
310 pictures {
311 ...PictureFields
315 fragment EpochFields on Epoch {
317 endYear
318 pictures {
319 ...PictureFields
321 startYear
322 title
325 fragment GenreFields on Genre {
327 pictures {
328 ...PictureFields
330 title
333 fragment GroupFields on Group {
335 name
336 typeDisplayName
337 pictures {
338 ...PictureFields
342 fragment PartnerFields on Partner {
344 name
345 typeDisplayName
346 subtypeDisplayName
347 pictures {
348 ...PictureFields
352 fragment PerformanceWorkFields on PerformanceWork {
353 __typename
355 artists {
356 ...artistWithRoleFields
358 groups {
359 edges {
360 node {
362 name
363 typeDisplayName
367 work {
368 ...workFields
370 stream {
371 ...streamFields
373 vodConcert {
374 __typename
377 duration
378 cuePoints {
379 mark
380 title
384 fragment VideoFields on Video {
386 archiveReleaseDate
387 title
388 subtitle
389 pictures {
390 ...PictureFields
392 stream {
393 ...streamFields
395 trailerStream {
396 ...streamFields
398 duration
399 typeDisplayName
400 duration
401 geoAccessCountries
402 geoAccessMode
403 publicationLevel
404 takedownDate
407 fragment VodConcertFields on VodConcert {
409 archiveReleaseDate
410 pictures {
411 ...PictureFields
413 subtitle
414 title
415 typeDisplayName
416 totalDuration
417 geoAccessCountries
418 geoAccessMode
419 trailerStream {
420 ...streamFields
422 publicationLevel
423 takedownDate
426 fragment BannerFields on Banner {
427 description
428 link
429 pictures {
430 ...PictureFields
432 title
433 }'''
435 _TOKEN = None
437 def _perform_login(self, username, password):
438 auth = self._download_json('https://audience.api.stageplus.io/oauth/token', None, headers={
439 'Content-Type': 'application/json',
440 'Origin': 'https://www.stage-plus.com',
441 }, data=json.dumps({
442 'grant_type': 'password',
443 'username': username,
444 'password': password,
445 'device_info': 'Chrome (Windows)',
446 'client_device_id': str(uuid.uuid4()),
447 }, separators=(',', ':')).encode(), note='Logging in')
449 if auth.get('access_token'):
450 self._TOKEN = auth['access_token']
452 def _real_initialize(self):
453 if self._TOKEN:
454 return
456 self._TOKEN = try_call(
457 lambda: self._get_cookies('https://www.stage-plus.com/')['dgplus_access_token'].value)
458 if not self._TOKEN:
459 self.raise_login_required()
461 def _real_extract(self, url):
462 concert_id = self._match_id(url)
464 data = self._download_json('https://audience.api.stageplus.io/graphql', concert_id, headers={
465 'authorization': f'Bearer {self._TOKEN}',
466 'content-type': 'application/json',
467 'Origin': 'https://www.stage-plus.com',
468 }, data=json.dumps({
469 'query': self._GRAPHQL_QUERY,
470 'variables': {'videoId': concert_id},
471 'operationName': 'videoDetailPage',
472 }, separators=(',', ':')).encode())['data']['node']
474 metadata = traverse_obj(data, {
475 'title': 'title',
476 'description': ('shortDescription', {str}),
477 'artists': ('artists', 'edges', ..., 'node', 'name'),
478 'timestamp': ('archiveReleaseDate', {unified_timestamp}),
479 'release_timestamp': ('productionDate', {unified_timestamp}),
482 thumbnails = traverse_obj(data, ('pictures', lambda _, v: url_or_none(v['url']), {
483 'id': 'name',
484 'url': 'url',
485 })) or None
487 entries = []
488 for idx, video in enumerate(traverse_obj(data, (
489 'performanceWorks', lambda _, v: v['id'] and url_or_none(v['stream']['url']))), 1):
490 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
491 video['stream']['url'], video['id'], 'mp4', m3u8_id='hls', query={'token': self._TOKEN})
492 entries.append({
493 'id': video['id'],
494 'formats': formats,
495 'subtitles': subtitles,
496 'album': metadata.get('title'),
497 'album_artists': metadata.get('artist'),
498 'track_number': idx,
499 **metadata,
500 **traverse_obj(video, {
501 'title': ('work', 'title'),
502 'track': ('work', 'title'),
503 'duration': ('duration', {float_or_none}),
504 'chapters': (
505 'cuePoints', lambda _, v: float_or_none(v['mark']) is not None, {
506 'title': 'title',
507 'start_time': ('mark', {float_or_none}),
509 'artists': ('artists', 'edges', ..., 'node', 'name'),
510 'composers': ('work', 'composers', ..., 'name'),
511 'genre': ('work', 'genre', 'title'),
515 return self.playlist_result(entries, concert_id, thumbnails=thumbnails, **metadata)