[cleanup] Misc (#8968)
[yt-dlp.git] / yt_dlp / extractor / tennistv.py
blobc1b4a33124aea0540a3dd149e070fce52f59d5e1
1 import urllib.parse
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 random_uuidv4,
7 unified_timestamp,
8 urlencode_postdata,
12 class TennisTVIE(InfoExtractor):
13 _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P<id>[-a-z0-9]+)'
14 _TESTS = [{
15 'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz',
16 'info_dict': {
17 'id': 'indian-wells-2018-verdasco-fritz',
18 'ext': 'mp4',
19 'title': 'Fernando Verdasco v Taylor Fritz',
20 'description': 're:^After his stunning victory.{174}$',
21 'thumbnail': 'https://atp-prod.akamaized.net/api/images/v1/images/112831/landscape/1242/0',
22 'timestamp': 1521017381,
23 'upload_date': '20180314',
25 'params': {
26 'skip_download': True,
28 'skip': 'Requires email and password of a subscribed account',
29 }, {
30 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5',
31 'info_dict': {
32 'id': '2650480',
33 'ext': 'mp4',
34 'title': 'Best Matches of 2022 - Part 5',
35 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c',
36 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920',
38 'params': {'skip_download': 'm3u8'},
39 'skip': 'Requires email and password of a subscribed account',
41 _NETRC_MACHINE = 'tennistv'
43 access_token, refresh_token = None, None
44 _PARTNER_ID = 3001482
45 _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}'
46 _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect'
47 _HEADERS = {
48 'origin': 'https://www.tennistv.com',
49 'referer': 'https://www.tennistv.com/',
50 'content-Type': 'application/x-www-form-urlencoded'
53 def _perform_login(self, username, password):
54 login_page = self._download_webpage(
55 f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page',
56 query={
57 'client_id': 'tennis-tv-web',
58 'redirect_uri': 'https://tennistv.com',
59 'response_mode': 'fragment',
60 'response_type': 'code',
61 'scope': 'openid'
64 post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url')
65 temp_page = self._download_webpage(
66 post_url, None, 'Sending login data', 'Unable to send login data',
67 headers=self._HEADERS, data=urlencode_postdata({
68 'username': username,
69 'password': password,
70 'submitAction': 'Log In'
71 }))
72 if 'Your username or password was incorrect' in temp_page:
73 raise ExtractorError('Your username or password was incorrect', expected=True)
75 handle = self._request_webpage(
76 f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS,
77 query={
78 'client_id': 'tennis-tv-web',
79 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html',
80 'state': random_uuidv4(),
81 'response_mode': 'fragment',
82 'response_type': 'code',
83 'scope': 'openid',
84 'nonce': random_uuidv4(),
85 'prompt': 'none'
88 self.get_token(None, {
89 'code': urllib.parse.parse_qs(handle.url)['code'][-1],
90 'grant_type': 'authorization_code',
91 'client_id': 'tennis-tv-web',
92 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html'
95 def get_token(self, video_id, payload):
96 res = self._download_json(
97 f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens',
98 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload))
100 self.access_token = res.get('access_token') or self.access_token
101 self.refresh_token = res.get('refresh_token') or self.refresh_token
103 def _real_initialize(self):
104 if self.access_token and self.refresh_token:
105 return
107 cookies = self._get_cookies('https://www.tennistv.com/')
108 if not cookies.get('access_token') or not cookies.get('refresh_token'):
109 self.raise_login_required()
110 self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value
112 def _download_session_json(self, video_id, entryid,):
113 return self._download_json(
114 f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}',
115 video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS)
117 def _real_extract(self, url):
118 video_id = self._match_id(url)
119 webpage = self._download_webpage(url, video_id)
121 entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID')
122 session_json = self._download_session_json(video_id, entryid)
124 k_session = session_json.get('KSession')
125 if k_session is None:
126 self.get_token(video_id, {
127 'grant_type': 'refresh_token',
128 'refresh_token': self.refresh_token,
129 'client_id': 'tennis-tv-web'
131 k_session = self._download_session_json(video_id, entryid).get('KSession')
132 if k_session is None:
133 raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True)
135 if session_json.get('ErrorMessage'):
136 self.report_warning(session_json['ErrorMessage'])
138 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
139 self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id)
141 return {
142 'id': video_id,
143 'title': self._generic_title('', webpage),
144 'description': self._html_search_regex(
145 (r'<span itemprop="description" content=["\']([^"\']+)["\']>', *self._og_regexes('description')),
146 webpage, 'description', fatal=False),
147 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920',
148 'timestamp': unified_timestamp(self._html_search_regex(
149 r'<span itemprop="uploadDate" content=["\']([^"\']+)["\']>', webpage, 'upload time', fatal=False)),
150 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None,
151 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None,
152 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None,
153 'formats': formats,
154 'subtitles': subtitles,