Release 2024.12.23
[yt-dlp.git] / yt_dlp / extractor / medici.py
blobb6235b64df35b4d3e6287ad565d9dc212d4b83e2
1 import urllib.parse
3 from .common import InfoExtractor
4 from ..utils import (
5 filter_dict,
6 parse_iso8601,
7 traverse_obj,
8 try_call,
9 url_or_none,
13 class MediciIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:(?P<sub>www|edu)\.)?medici\.tv/[a-z]{2}/[\w.-]+/(?P<id>[^/?#&]+)'
15 _TESTS = [{
16 'url': 'https://www.medici.tv/en/operas/thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
17 'md5': 'd483f74e7a7a9eac0dbe152ab189050d',
18 'info_dict': {
19 'id': '8032',
20 'ext': 'mp4',
21 'title': 'Thomas Adès\'s The Exterminating Angel',
22 'description': 'md5:708ae6350dadc604225b4a6e32482bab',
23 'thumbnail': r're:https://.+/.+\.jpg',
24 'upload_date': '20240304',
25 'timestamp': 1709561766,
26 'display_id': 'thomas-ades-the-exterminating-angel-calixto-bieito-opera-bastille-paris',
28 'expected_warnings': [r'preview'],
29 }, {
30 'url': 'https://edu.medici.tv/en/operas/wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
31 'md5': '4ef3f4079a6e1c617584463a9eb84f99',
32 'info_dict': {
33 'id': '7900',
34 'ext': 'mp4',
35 'title': 'Wagner\'s Lohengrin',
36 'description': 'md5:a384a62937866101f86902f21752cd89',
37 'thumbnail': r're:https://.+/.+\.jpg',
38 'upload_date': '20231017',
39 'timestamp': 1697554771,
40 'display_id': 'wagner-lohengrin-paris-opera-kirill-serebrennikov-piotr-beczala-kwangchul-youn-johanni-van-oostrum',
42 'expected_warnings': [r'preview'],
43 }, {
44 'url': 'https://www.medici.tv/en/concerts/sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
45 'md5': '9dd757e53b22b2511e85ea9ea60e4815',
46 'info_dict': {
47 'id': '5712',
48 'ext': 'mp4',
49 'title': 'Sergey Smbatyan conducts Tigran Mansurian — With Chouchane Siranossian and Mario Brunello',
50 'thumbnail': r're:https://.+/.+\.jpg',
51 'description': 'md5:9411fe44c874bb10e9af288c65816e41',
52 'upload_date': '20200323',
53 'timestamp': 1584975600,
54 'display_id': 'sergey-smbatyan-conducts-mansurian-chouchane-siranossian-mario-brunello',
56 'expected_warnings': [r'preview'],
57 }, {
58 'url': 'https://www.medici.tv/en/ballets/carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
59 'md5': '40f5e76cb701a97a6d7ba23b62c49990',
60 'info_dict': {
61 'id': '7857',
62 'ext': 'mp4',
63 'title': 'Carmen by Jiří Bubeníček after Roland Petit, music by Bizet, de Falla, Castelnuovo-Tedesco, and Bonolis',
64 'thumbnail': r're:https://.+/.+\.jpg',
65 'description': 'md5:0f15a15611ed748020c769873e10a8bb',
66 'upload_date': '20240223',
67 'timestamp': 1708707600,
68 'display_id': 'carmen-ballet-choregraphie-de-jiri-bubenicek-teatro-dellopera-di-roma',
70 'expected_warnings': [r'preview'],
71 }, {
72 'url': 'https://www.medici.tv/en/documentaries/la-sonnambula-liege-2023-documentaire',
73 'md5': '87ff198018ce79a34757ab0dd6f21080',
74 'info_dict': {
75 'id': '7513',
76 'ext': 'mp4',
77 'title': 'La Sonnambula',
78 'thumbnail': r're:https://.+/.+\.jpg',
79 'description': 'md5:0caf9109a860fd50cd018df062a67f34',
80 'upload_date': '20231103',
81 'timestamp': 1699010830,
82 'display_id': 'la-sonnambula-liege-2023-documentaire',
84 'expected_warnings': [r'preview'],
85 }, {
86 'url': 'https://edu.medici.tv/en/masterclasses/yvonne-loriod-olivier-messiaen',
87 'md5': 'fb5dcec46d76ad20fbdbaabb01da191d',
88 'info_dict': {
89 'id': '3024',
90 'ext': 'mp4',
91 'title': 'Olivier Messiaen and Yvonne Loriod, pianists and teachers',
92 'thumbnail': r're:https://.+/.+\.jpg',
93 'description': 'md5:aab948e2f7690214b5c28896c83f1fc1',
94 'upload_date': '20150223',
95 'timestamp': 1424706608,
96 'display_id': 'yvonne-loriod-olivier-messiaen',
98 'skip': 'Requires authentication; preview starts in the middle',
99 }, {
100 'url': 'https://www.medici.tv/en/jazz/makaya-mccraven-la-rochelle',
101 'md5': '4cc279a8b06609782747c8f50beea2b3',
102 'info_dict': {
103 'id': '7922',
104 'ext': 'mp4',
105 'title': 'NEW: Makaya McCraven in La Rochelle',
106 'thumbnail': r're:https://.+/.+\.jpg',
107 'description': 'md5:b5a8aaeb6993d8ccb18bde8abb8aa8d2',
108 'upload_date': '20231228',
109 'timestamp': 1703754863,
110 'display_id': 'makaya-mccraven-la-rochelle',
112 'expected_warnings': [r'preview'],
115 def _real_extract(self, url):
116 display_id, subdomain = self._match_valid_url(url).group('id', 'sub')
117 self._request_webpage(url, display_id, 'Requesting CSRF token cookie')
119 subdomain = 'edu-' if subdomain == 'edu' else ''
120 origin = f'https://{urllib.parse.urlparse(url).hostname}'
122 data = self._download_json(
123 f'https://api.medici.tv/{subdomain}satie/edito/movie-file/{display_id}/', display_id,
124 headers=filter_dict({
125 'Authorization': try_call(
126 lambda: urllib.parse.unquote(self._get_cookies(url)['auth._token.mAuth'].value)),
127 'Device-Type': 'web',
128 'Origin': origin,
129 'Referer': f'{origin}/',
130 'Accept': 'application/json, text/plain, */*',
133 if not traverse_obj(data, ('video', 'is_full_video')) and traverse_obj(
134 data, ('video', 'is_limited_by_user_access')):
135 self.report_warning(
136 'The full video is for subscribers only. Only previews will be downloaded. If you '
137 'have used the --cookies-from-browser option, try using the --cookies option instead')
139 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
140 data['video']['video_url'], display_id, 'mp4')
142 return {
143 'id': str(data['id']),
144 'display_id': display_id,
145 'formats': formats,
146 'subtitles': subtitles,
147 **traverse_obj(data, {
148 'title': ('title', {str}),
149 'description': ('subtitle', {str}),
150 'thumbnail': ('picture', {url_or_none}),
151 'timestamp': ('date_publish', {parse_iso8601}),