[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / sonyliv.py
blob0cd914cbbaca54635cabffd77a0103f581ea1ae2
1 import datetime as dt
2 import itertools
3 import json
4 import math
5 import random
6 import time
7 import uuid
9 from .common import InfoExtractor
10 from ..networking.exceptions import HTTPError
11 from ..utils import (
12 ExtractorError,
13 int_or_none,
14 jwt_decode_hs256,
15 try_call,
17 from ..utils.traversal import traverse_obj
20 class SonyLIVIE(InfoExtractor):
21 _VALID_URL = r'''(?x)
22 (?:
23 sonyliv:|
24 https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-
26 (?P<id>\d+)
27 '''
28 _TESTS = [{
29 'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
30 'info_dict': {
31 'title': 'Achaari Cheese Toast',
32 'id': '1000022678',
33 'ext': 'mp4',
34 'upload_date': '20200411',
35 'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
36 'timestamp': 1586632091,
37 'duration': 185,
38 'season_number': 1,
39 'series': 'Bachelors Delight',
40 'episode_number': 1,
41 'release_year': 2016,
43 'params': {
44 'skip_download': True,
46 }, {
47 'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
48 'only_matching': True,
49 }, {
50 'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
51 'only_matching': True,
52 }, {
53 'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
54 'only_matching': True,
55 }, {
56 'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
57 'only_matching': True,
58 }, {
59 'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
60 'only_matching': True,
62 _GEO_COUNTRIES = ['IN']
63 _HEADERS = {}
64 _LOGIN_HINT = 'Use "--username <mobile_number>" to login using OTP or "--username token --password <auth_token>" to login using auth token.'
65 _NETRC_MACHINE = 'sonyliv'
67 def _get_device_id(self):
68 e = int(time.time() * 1000)
69 t = list('xxxxxxxxxxxx4xxxyxxxxxxxxxxxxxxx')
70 for i, c in enumerate(t):
71 n = int((e + 16 * random.random()) % 16) | 0
72 e = math.floor(e / 16)
73 if c == 'x':
74 t[i] = str(n)
75 elif c == 'y':
76 t[i] = f'{3 & n | 8:x}'
77 return ''.join(t) + '-' + str(int(time.time() * 1000))
79 def _perform_login(self, username, password):
80 self._HEADERS['device_id'] = self._get_device_id()
81 self._HEADERS['content-type'] = 'application/json'
83 if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
84 self._HEADERS['authorization'] = password
85 self.report_login()
86 return
87 elif len(username) != 10 or not username.isdigit():
88 raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
90 self.report_login()
91 otp_request_json = self._download_json(
92 'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2',
93 None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({
94 'mobileNumber': username,
95 'channelPartnerID': 'MSMIND',
96 'country': 'IN',
97 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
98 'otpSize': 6,
99 'loginType': 'REGISTERORSIGNIN',
100 'isMobileMandatory': True,
101 }).encode())
102 if otp_request_json['resultCode'] == 'KO':
103 raise ExtractorError(otp_request_json['message'], expected=True)
105 otp_verify_json = self._download_json(
106 'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2',
107 None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({
108 'channelPartnerID': 'MSMIND',
109 'mobileNumber': username,
110 'country': 'IN',
111 'otp': self._get_tfa_info('OTP'),
112 'dmaId': 'IN',
113 'ageConfirmation': True,
114 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
115 'isMobileMandatory': True,
116 }).encode())
117 if otp_verify_json['resultCode'] == 'KO':
118 raise ExtractorError(otp_request_json['message'], expected=True)
119 self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
121 def _call_api(self, version, path, video_id):
122 try:
123 return self._download_json(
124 f'https://apiv2.sonyliv.com/AGL/{version}/A/ENG/WEB/{path}',
125 video_id, headers=self._HEADERS)['resultObj']
126 except ExtractorError as e:
127 if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json(
128 e.cause.response.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
129 self.raise_login_required(self._LOGIN_HINT, method=None)
130 if isinstance(e.cause, HTTPError) and e.cause.status == 403:
131 message = self._parse_json(
132 e.cause.response.read().decode(), video_id)['message']
133 if message == 'Geoblocked Country':
134 self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
135 raise ExtractorError(message)
136 raise
138 def _initialize_pre_login(self):
139 self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None)
141 def _real_extract(self, url):
142 video_id = self._match_id(url)
143 content = self._call_api(
144 '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
145 if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
146 self.report_drm(video_id)
147 dash_url = content['videoURL']
148 headers = {
149 'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000),
151 formats = self._extract_mpd_formats(
152 dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
153 formats.extend(self._extract_m3u8_formats(
154 dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
155 video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
156 for f in formats:
157 f.setdefault('http_headers', {}).update(headers)
159 metadata = self._call_api(
160 '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
161 title = metadata['episodeTitle']
162 subtitles = {}
163 for sub in content.get('subtitle', []):
164 sub_url = sub.get('subtitleUrl')
165 if not sub_url:
166 continue
167 subtitles.setdefault(sub.get('subtitleLanguageName', 'ENG'), []).append({
168 'url': sub_url,
170 return {
171 'id': video_id,
172 'title': title,
173 'formats': formats,
174 'thumbnail': content.get('posterURL'),
175 'description': metadata.get('longDescription') or metadata.get('shortDescription'),
176 'timestamp': int_or_none(metadata.get('creationDate'), 1000),
177 'duration': int_or_none(metadata.get('duration')),
178 'season_number': int_or_none(metadata.get('season')),
179 'series': metadata.get('title'),
180 'episode_number': int_or_none(metadata.get('episodeNumber')),
181 'release_year': int_or_none(metadata.get('year')),
182 'subtitles': subtitles,
186 class SonyLIVSeriesIE(InfoExtractor):
187 _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})/?(?:$|[?#])'
188 _TESTS = [{
189 'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
190 'playlist_mincount': 452,
191 'info_dict': {
192 'id': '1700000091',
194 }, {
195 'url': 'https://www.sonyliv.com/shows/beyhadh-1700000007/',
196 'playlist_mincount': 358,
197 'info_dict': {
198 'id': '1700000007',
201 _API_BASE = 'https://apiv2.sonyliv.com/AGL'
202 _SORT_ORDERS = ('asc', 'desc')
204 def _entries(self, show_id, sort_order):
205 headers = {
206 'Accept': 'application/json, text/plain, */*',
207 'Referer': 'https://www.sonyliv.com',
209 headers['security_token'] = self._download_json(
210 f'{self._API_BASE}/1.4/A/ENG/WEB/ALL/GETTOKEN', show_id,
211 'Downloading security token', headers=headers)['resultObj']
212 seasons = traverse_obj(self._download_json(
213 f'{self._API_BASE}/1.9/R/ENG/WEB/IN/DL/DETAIL/{show_id}', show_id,
214 'Downloading series JSON', headers=headers, query={
215 'kids_safe': 'false',
216 'from': '0',
217 'to': '49',
218 }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
220 if sort_order == 'desc':
221 seasons = reversed(seasons)
222 for season in seasons:
223 season_id = str(season['id'])
224 note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
225 cursor = 0
226 for page_num in itertools.count(1):
227 episodes = traverse_obj(self._download_json(
228 f'{self._API_BASE}/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{season_id}',
229 season_id, f'Downloading {note} page {page_num} JSON', headers=headers, query={
230 'from': str(cursor),
231 'to': str(cursor + 99),
232 'orderBy': 'episodeNumber',
233 'sortOrder': sort_order,
234 }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
235 if not episodes:
236 break
237 for episode in episodes:
238 video_id = str(episode['id'])
239 yield self.url_result(f'sonyliv:{video_id}', SonyLIVIE, video_id)
240 cursor += 100
242 def _real_extract(self, url):
243 show_id = self._match_id(url)
245 sort_order = self._configuration_arg('sort_order', [self._SORT_ORDERS[0]])[0]
246 if sort_order not in self._SORT_ORDERS:
247 raise ValueError(
248 f'Invalid sort order "{sort_order}". Allowed values are: {", ".join(self._SORT_ORDERS)}')
250 return self.playlist_result(self._entries(show_id, sort_order), playlist_id=show_id)