[ie/cwtv:movie] Add extractor (#12227)
[yt-dlp.git] / yt_dlp / extractor / eurosport.py
blob682546f8f4cb5bd81e63ee9df1ca929a34e8c8b9
1 from .common import InfoExtractor
2 from ..utils import traverse_obj
5 class EurosportIE(InfoExtractor):
6 _VALID_URL = r'''(?x)
7 https?://(?:
8 (?:(?:www|espanol)\.)?eurosport\.(?:com(?:\.tr)?|de|dk|es|fr|hu|it|nl|no|ro)|
9 eurosport\.tvn24\.pl
10 )/[\w-]+/(?:[\w-]+/[\d-]+/)?[\w.-]+_(?P<id>vid\d+)
11 '''
12 _TESTS = [{
13 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/highlights-rafael-nadal-brushes-aside-caper-ruud-to-win-record-extending-14th-french-open-title_vid1694147/video.shtml',
14 'info_dict': {
15 'id': '2480939',
16 'ext': 'mp4',
17 'title': 'Highlights: Rafael Nadal brushes aside Caper Ruud to win record-extending 14th French Open title',
18 'description': 'md5:b564db73ecfe4b14ebbd8e62a3692c76',
19 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388285-69245968-2560-1440.png',
20 'duration': 195.0,
21 'display_id': 'vid1694147',
22 'timestamp': 1654446698,
23 'upload_date': '20220605',
25 }, {
26 'url': 'https://www.eurosport.com/tennis/roland-garros/2022/watch-the-top-five-shots-from-men-s-final-as-rafael-nadal-beats-casper-ruud-to-seal-14th-french-open_vid1694283/video.shtml',
27 'info_dict': {
28 'id': '2481254',
29 'ext': 'mp4',
30 'title': 'md5:149dcc5dfb38ab7352acc008cc9fb071',
31 'duration': 130.0,
32 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/06/05/3388422-69248708-2560-1440.png',
33 'description': 'md5:a0c8a7f6b285e48ae8ddbe7aa85cfee6',
34 'display_id': 'vid1694283',
35 'timestamp': 1654456090,
36 'upload_date': '20220605',
38 }, {
39 # geo-fence but can bypassed by xff
40 'url': 'https://www.eurosport.com/cycling/tour-de-france-femmes/2022/incredible-ride-marlen-reusser-storms-to-stage-4-win-at-tour-de-france-femmes_vid1722221/video.shtml',
41 'info_dict': {
42 'id': '2582552',
43 'ext': 'mp4',
44 'title': '‘Incredible ride!’ - Marlen Reusser storms to Stage 4 win at Tour de France Femmes',
45 'duration': 188.0,
46 'display_id': 'vid1722221',
47 'timestamp': 1658936167,
48 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2022/07/27/3423347-69852108-2560-1440.jpg',
49 'description': 'md5:32bbe3a773ac132c57fb1e8cca4b7c71',
50 'upload_date': '20220727',
52 }, {
53 'url': 'https://www.eurosport.com/football/champions-league/2022-2023/pep-guardiola-emotionally-destroyed-after-manchester-city-win-over-bayern-munich-in-champions-league_vid1896254/video.shtml',
54 'info_dict': {
55 'id': '3096477',
56 'ext': 'mp4',
57 'title': 'md5:82edc17370124c7a19b3cf518517583b',
58 'duration': 84.0,
59 'description': 'md5:b3f44ef7f5b5b95b24a273b163083feb',
60 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/04/12/3682873-74947393-2560-1440.jpg',
61 'timestamp': 1681292028,
62 'upload_date': '20230412',
63 'display_id': 'vid1896254',
65 }, {
66 'url': 'https://www.eurosport.com/football/last-year-s-semi-final-pain-was-still-there-pep-guardiola-after-man-city-reach-cl-final_vid1914115/video.shtml',
67 'info_dict': {
68 'id': '3149108',
69 'ext': 'mp4',
70 'title': '\'Last year\'s semi-final pain was still there\' - Pep Guardiola after Man City reach CL final',
71 'description': 'md5:89ef142fe0170a66abab77fac2955d8e',
72 'display_id': 'vid1914115',
73 'timestamp': 1684403618,
74 'thumbnail': 'https://imgresizer.eurosport.com/unsafe/1280x960/smart/filters:format(jpeg)/origin-imgresizer.eurosport.com/2023/05/18/3707254-75435008-2560-1440.jpg',
75 'duration': 105.0,
76 'upload_date': '20230518',
78 }, {
79 'url': 'https://www.eurosport.de/radsport/vuelta-a-espana/2024/vuelta-a-espana-2024-wout-van-aert-und-co.-verzweifeln-an-mcnulty-zeitfahr-krimi-in-lissabon_vid2219478/video.shtml',
80 'only_matching': True,
81 }, {
82 'url': 'https://www.eurosport.dk/speedway/mikkel-michelsen-misser-finalen-i-cardiff-se-danskeren-i-semifinalen-her_vid2219363/video.shtml',
83 'only_matching': True,
84 }, {
85 'url': 'https://www.eurosport.nl/mixed-martial-arts/ufc/2022/ufc-305-respect-tussen-adesanya-en-du-plessis_vid2219650/video.shtml',
86 'only_matching': True,
87 }, {
88 'url': 'https://www.eurosport.es/ciclismo/la-vuelta-2024-carlos-rodriguez-olvida-la-crono-y-ya-espera-que-llegue-la-montana-no-me-encontre-nada-comodo_vid2219682/video.shtml',
89 'only_matching': True,
90 }, {
91 'url': 'https://www.eurosport.fr/football/supercoupe-d-europe/2024-2025/kylian-mbappe-vinicius-junior-eduardo-camavinga-touche.-extraits-de-l-entrainement-du-real-madrid-en-video_vid2216993/video.shtml',
92 'only_matching': True,
93 }, {
94 'url': 'https://www.eurosport.it/calcio/serie-a/2024-2025/samardzic-a-bergamo-per-le-visite-mediche-con-l-atalanta_vid2219680/video.shtml',
95 'only_matching': True,
96 }, {
97 'url': 'https://www.eurosport.hu/kerekpar/vuelta-a-espana/2024/dramai-harc-a-masodpercekert-meglepetesgyoztes-a-vuelta-nyitoszakaszan_vid2219481/video.shtml',
98 'only_matching': True,
99 }, {
100 'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid30000618/video.shtml',
101 'only_matching': True,
102 }, {
103 'url': 'https://www.eurosport.no/golf/fedex-st-jude-championship/2024/ligger-pa-andreplass-sa-skjer-dette-drama_vid2219531/video.shtml',
104 'only_matching': True,
105 }, {
106 'url': 'https://www.eurosport.ro/tenis/western-southern-open-2/2024/rezumatul-partidei-dintre-zverev-si-shelton-de-la-cincinnati_vid2219657/video.shtml',
107 'only_matching': True,
108 }, {
109 'url': 'https://www.eurosport.com.tr/hentbol/olympic-games-paris-2024/2024/paris-2024-denmark-ile-germany-olimpiyatlarin-onemli-anlari_vid2215836/video.shtml',
110 'only_matching': True,
111 }, {
112 'url': 'https://eurosport.tvn24.pl/kolarstwo/tour-de-france-kobiet/2024/kasia-niewiadoma-przed-ostatnim-8.-etapem-tour-de-france-kobiet_vid2219765/video.shtml',
113 'only_matching': True,
116 _TOKEN = None
118 # actually defined in https://netsport.eurosport.io/?variables={"databaseId":<databaseId>,"playoutType":"VDP"}&extensions={"persistedQuery":{"version":1 ..
119 # but this method require to get sha256 hash
120 _GEO_COUNTRIES = ['DE', 'NL', 'EU', 'IT', 'FR'] # Not complete list but it should work
121 _GEO_BYPASS = False
123 def _real_initialize(self):
124 if EurosportIE._TOKEN is None:
125 EurosportIE._TOKEN = self._download_json(
126 'https://eu3-prod-direct.eurosport.com/token?realm=eurosport', None,
127 'Trying to get token')['data']['attributes']['token']
129 def _real_extract(self, url):
130 display_id = self._match_id(url)
131 webpage = self._download_webpage(url, display_id)
133 json_data = self._download_json(
134 f'https://eu3-prod-direct.eurosport.com/playback/v2/videoPlaybackInfo/sourceSystemId/eurosport-{display_id}',
135 display_id, query={'usePreAuth': True}, headers={'Authorization': f'Bearer {EurosportIE._TOKEN}'})['data']
137 json_ld_data = self._search_json_ld(webpage, display_id)
139 formats, subtitles = [], {}
140 for stream_type in json_data['attributes']['streaming']:
141 if stream_type == 'hls':
142 fmts, subs = self._extract_m3u8_formats_and_subtitles(
143 traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, ext='mp4', fatal=False)
144 elif stream_type == 'dash':
145 fmts, subs = self._extract_mpd_formats_and_subtitles(
146 traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
147 elif stream_type == 'mss':
148 fmts, subs = self._extract_ism_formats_and_subtitles(
149 traverse_obj(json_data, ('attributes', 'streaming', stream_type, 'url')), display_id, fatal=False)
151 formats.extend(fmts)
152 self._merge_subtitles(subs, target=subtitles)
154 return {
155 'id': json_data['id'],
156 'title': json_ld_data.get('title') or self._og_search_title(webpage),
157 'display_id': display_id,
158 'formats': formats,
159 'subtitles': subtitles,
160 'thumbnails': json_ld_data.get('thumbnails'),
161 'description': (json_ld_data.get('description')
162 or self._html_search_meta(['og:description', 'description'], webpage)),
163 'duration': json_ld_data.get('duration'),
164 'timestamp': json_ld_data.get('timestamp'),