[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / jiocinema.py
blob94c85064ef375e87049d54e0cd9653f4c851f189
1 import base64
2 import itertools
3 import json
4 import random
5 import re
6 import string
7 import time
9 from .common import InfoExtractor
10 from ..utils import (
11 ExtractorError,
12 float_or_none,
13 int_or_none,
14 jwt_decode_hs256,
15 parse_age_limit,
16 try_call,
17 url_or_none,
19 from ..utils.traversal import traverse_obj
22 class JioCinemaBaseIE(InfoExtractor):
23 _NETRC_MACHINE = 'jiocinema'
24 _GEO_BYPASS = False
25 _ACCESS_TOKEN = None
26 _REFRESH_TOKEN = None
27 _GUEST_TOKEN = None
28 _USER_ID = None
29 _DEVICE_ID = None
30 _API_HEADERS = {'Origin': 'https://www.jiocinema.com', 'Referer': 'https://www.jiocinema.com/'}
31 _APP_NAME = {'appName': 'RJIL_JioCinema'}
32 _APP_VERSION = {'appVersion': '5.0.0'}
33 _API_SIGNATURES = 'o668nxgzwff'
34 _METADATA_API_BASE = 'https://content-jiovoot.voot.com/psapi'
35 _ACCESS_HINT = 'the `accessToken` from your browser local storage'
36 _LOGIN_HINT = (
37 'Log in with "-u phone -p <PHONE_NUMBER>" to authenticate with OTP, '
38 f'or use "-u token -p <ACCESS_TOKEN>" to log in with {_ACCESS_HINT}. '
39 'If you have previously logged in with yt-dlp and your session '
40 'has been cached, you can use "-u device -p <DEVICE_ID>"')
42 def _cache_token(self, token_type):
43 assert token_type in ('access', 'refresh', 'all')
44 if token_type in ('access', 'all'):
45 self.cache.store(
46 JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-access', JioCinemaBaseIE._ACCESS_TOKEN)
47 if token_type in ('refresh', 'all'):
48 self.cache.store(
49 JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh', JioCinemaBaseIE._REFRESH_TOKEN)
51 def _call_api(self, url, video_id, note='Downloading API JSON', headers={}, data={}):
52 return self._download_json(
53 url, video_id, note, data=json.dumps(data, separators=(',', ':')).encode(), headers={
54 'Content-Type': 'application/json',
55 'Accept': 'application/json',
56 **self._API_HEADERS,
57 **headers,
58 }, expected_status=(400, 403, 474))
60 def _call_auth_api(self, service, endpoint, note, headers={}, data={}):
61 return self._call_api(
62 f'https://auth-jiocinema.voot.com/{service}service/apis/v4/{endpoint}',
63 None, note=note, headers=headers, data=data)
65 def _refresh_token(self):
66 if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._DEVICE_ID:
67 raise ExtractorError('User token has expired', expected=True)
68 response = self._call_auth_api(
69 'token', 'refreshtoken', 'Refreshing token',
70 headers={'accesstoken': self._ACCESS_TOKEN}, data={
71 **self._APP_NAME,
72 'deviceId': self._DEVICE_ID,
73 'refreshToken': self._REFRESH_TOKEN,
74 **self._APP_VERSION,
76 refresh_token = response.get('refreshTokenId')
77 if refresh_token and refresh_token != JioCinemaBaseIE._REFRESH_TOKEN:
78 JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
79 self._cache_token('refresh')
80 JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
81 self._cache_token('access')
83 def _fetch_guest_token(self):
84 JioCinemaBaseIE._DEVICE_ID = ''.join(random.choices(string.digits, k=10))
85 guest_token = self._call_auth_api(
86 'token', 'guest', 'Downloading guest token', data={
87 **self._APP_NAME,
88 'deviceType': 'phone',
89 'os': 'ios',
90 'deviceId': self._DEVICE_ID,
91 'freshLaunch': False,
92 'adId': self._DEVICE_ID,
93 **self._APP_VERSION,
95 self._GUEST_TOKEN = guest_token['authToken']
96 self._USER_ID = guest_token['userId']
98 def _call_login_api(self, endpoint, guest_token, data, note):
99 return self._call_auth_api(
100 'user', f'loginotp/{endpoint}', note, headers={
101 **self.geo_verification_headers(),
102 'accesstoken': self._GUEST_TOKEN,
103 **self._APP_NAME,
104 **traverse_obj(guest_token, 'data', {
105 'deviceType': ('deviceType', {str}),
106 'os': ('os', {str}),
107 })}, data=data)
109 def _is_token_expired(self, token):
110 return (try_call(lambda: jwt_decode_hs256(token)['exp']) or 0) <= int(time.time() - 180)
112 def _perform_login(self, username, password):
113 if self._ACCESS_TOKEN and not self._is_token_expired(self._ACCESS_TOKEN):
114 return
116 UUID_RE = r'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
118 if username.lower() == 'token':
119 if try_call(lambda: jwt_decode_hs256(password)):
120 JioCinemaBaseIE._ACCESS_TOKEN = password
121 refresh_hint = 'the `refreshToken` UUID from your browser local storage'
122 refresh_token = self._configuration_arg('refresh_token', [''], ie_key=JioCinemaIE)[0]
123 if not refresh_token:
124 self.to_screen(
125 'To extend the life of your login session, in addition to your access token, '
126 'you can pass --extractor-args "jiocinema:refresh_token=REFRESH_TOKEN" '
127 f'where REFRESH_TOKEN is {refresh_hint}')
128 elif re.fullmatch(UUID_RE, refresh_token):
129 JioCinemaBaseIE._REFRESH_TOKEN = refresh_token
130 else:
131 self.report_warning(f'Invalid refresh_token value. Use {refresh_hint}')
132 else:
133 raise ExtractorError(
134 f'The password given could not be decoded as a token; use {self._ACCESS_HINT}', expected=True)
136 elif username.lower() == 'device' and re.fullmatch(rf'(?:{UUID_RE}|\d+)', password):
137 JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-refresh')
138 JioCinemaBaseIE._ACCESS_TOKEN = self.cache.load(JioCinemaBaseIE._NETRC_MACHINE, f'{password}-access')
139 if not JioCinemaBaseIE._REFRESH_TOKEN or not JioCinemaBaseIE._ACCESS_TOKEN:
140 raise ExtractorError(f'Failed to load cached tokens for device ID "{password}"', expected=True)
142 elif username.lower() == 'phone' and re.fullmatch(r'\+?\d+', password):
143 self._fetch_guest_token()
144 guest_token = jwt_decode_hs256(self._GUEST_TOKEN)
145 initial_data = {
146 'number': base64.b64encode(password.encode()).decode(),
147 **self._APP_VERSION,
149 response = self._call_login_api('send', guest_token, initial_data, 'Requesting OTP')
150 if not traverse_obj(response, ('OTPInfo', {dict})):
151 raise ExtractorError('There was a problem with the phone number login attempt')
153 is_iphone = guest_token.get('os') == 'ios'
154 response = self._call_login_api('verify', guest_token, {
155 'deviceInfo': {
156 'consumptionDeviceName': 'iPhone' if is_iphone else 'Android',
157 'info': {
158 'platform': {'name': 'iPhone OS' if is_iphone else 'Android'},
159 'androidId': self._DEVICE_ID,
160 'type': 'iOS' if is_iphone else 'Android',
163 **initial_data,
164 'otp': self._get_tfa_info('the one-time password sent to your phone'),
165 }, 'Submitting OTP')
166 if traverse_obj(response, 'code') == 1043:
167 raise ExtractorError('Wrong OTP', expected=True)
168 JioCinemaBaseIE._REFRESH_TOKEN = response['refreshToken']
169 JioCinemaBaseIE._ACCESS_TOKEN = response['authToken']
171 else:
172 raise ExtractorError(self._LOGIN_HINT, expected=True)
174 user_token = jwt_decode_hs256(JioCinemaBaseIE._ACCESS_TOKEN)['data']
175 JioCinemaBaseIE._USER_ID = user_token['userId']
176 JioCinemaBaseIE._DEVICE_ID = user_token['deviceId']
177 if JioCinemaBaseIE._REFRESH_TOKEN and username != 'device':
178 self._cache_token('all')
179 if self.get_param('cachedir') is not False:
180 self.to_screen(
181 f'NOTE: For subsequent logins you can use "-u device -p {JioCinemaBaseIE._DEVICE_ID}"')
182 elif not JioCinemaBaseIE._REFRESH_TOKEN:
183 JioCinemaBaseIE._REFRESH_TOKEN = self.cache.load(
184 JioCinemaBaseIE._NETRC_MACHINE, f'{JioCinemaBaseIE._DEVICE_ID}-refresh')
185 if JioCinemaBaseIE._REFRESH_TOKEN:
186 self._cache_token('access')
187 self.to_screen(f'Logging in as device ID "{JioCinemaBaseIE._DEVICE_ID}"')
188 if self._is_token_expired(JioCinemaBaseIE._ACCESS_TOKEN):
189 self._refresh_token()
192 class JioCinemaIE(JioCinemaBaseIE):
193 IE_NAME = 'jiocinema'
194 _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/?(?:movies?/[^/?#]+/|tv-shows/(?:[^/?#]+/){3})(?P<id>\d{3,})'
195 _TESTS = [{
196 'url': 'https://www.jiocinema.com/tv-shows/agnisakshi-ek-samjhauta/1/pradeep-to-stop-the-wedding/3759931',
197 'info_dict': {
198 'id': '3759931',
199 'ext': 'mp4',
200 'title': 'Pradeep to stop the wedding?',
201 'description': 'md5:75f72d1d1a66976633345a3de6d672b1',
202 'episode': 'Pradeep to stop the wedding?',
203 'episode_number': 89,
204 'season': 'Agnisakshi…Ek Samjhauta-S1',
205 'season_number': 1,
206 'series': 'Agnisakshi Ek Samjhauta',
207 'duration': 1238.0,
208 'thumbnail': r're:https?://.+\.jpg',
209 'age_limit': 13,
210 'season_id': '3698031',
211 'upload_date': '20230606',
212 'timestamp': 1686009600,
213 'release_date': '20230607',
214 'genres': ['Drama'],
216 'params': {'skip_download': 'm3u8'},
217 }, {
218 'url': 'https://www.jiocinema.com/movies/bhediya/3754021/watch',
219 'info_dict': {
220 'id': '3754021',
221 'ext': 'mp4',
222 'title': 'Bhediya',
223 'description': 'md5:a6bf2900371ac2fc3f1447401a9f7bb0',
224 'episode': 'Bhediya',
225 'duration': 8500.0,
226 'thumbnail': r're:https?://.+\.jpg',
227 'age_limit': 13,
228 'upload_date': '20230525',
229 'timestamp': 1685026200,
230 'release_date': '20230524',
231 'genres': ['Comedy'],
233 'params': {'skip_download': 'm3u8'},
236 def _extract_formats_and_subtitles(self, playback, video_id):
237 m3u8_url = traverse_obj(playback, (
238 'data', 'playbackUrls', lambda _, v: v['streamtype'] == 'hls', 'url', {url_or_none}, any))
239 if not m3u8_url: # DRM-only content only serves dash urls
240 self.report_drm(video_id)
241 formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, m3u8_id='hls')
242 self._remove_duplicate_formats(formats)
244 return {
245 # '/_definst_/smil:vod/' m3u8 manifests claim to have 720p+ formats but max out at 480p
246 'formats': traverse_obj(formats, (
247 lambda _, v: '/_definst_/smil:vod/' not in v['url'] or v['height'] <= 480)),
248 'subtitles': subtitles,
251 def _real_extract(self, url):
252 video_id = self._match_id(url)
253 if not self._ACCESS_TOKEN and self._is_token_expired(self._GUEST_TOKEN):
254 self._fetch_guest_token()
255 elif self._ACCESS_TOKEN and self._is_token_expired(self._ACCESS_TOKEN):
256 self._refresh_token()
258 playback = self._call_api(
259 f'https://apis-jiovoot.voot.com/playbackjv/v3/{video_id}', video_id,
260 'Downloading playback JSON', headers={
261 **self.geo_verification_headers(),
262 'accesstoken': self._ACCESS_TOKEN or self._GUEST_TOKEN,
263 **self._APP_NAME,
264 'deviceid': self._DEVICE_ID,
265 'uniqueid': self._USER_ID,
266 'x-apisignatures': self._API_SIGNATURES,
267 'x-platform': 'androidweb',
268 'x-platform-token': 'web',
269 }, data={
270 '4k': False,
271 'ageGroup': '18+',
272 'appVersion': '3.4.0',
273 'bitrateProfile': 'xhdpi',
274 'capability': {
275 'drmCapability': {
276 'aesSupport': 'yes',
277 'fairPlayDrmSupport': 'none',
278 'playreadyDrmSupport': 'none',
279 'widevineDRMSupport': 'none',
281 'frameRateCapability': [{
282 'frameRateSupport': '30fps',
283 'videoQuality': '1440p',
286 'continueWatchingRequired': False,
287 'dolby': False,
288 'downloadRequest': False,
289 'hevc': False,
290 'kidsSafe': False,
291 'manufacturer': 'Windows',
292 'model': 'Windows',
293 'multiAudioRequired': True,
294 'osVersion': '10',
295 'parentalPinValid': True,
296 'x-apisignatures': self._API_SIGNATURES,
299 status_code = traverse_obj(playback, ('code', {int}))
300 if status_code == 474:
301 self.raise_geo_restricted(countries=['IN'])
302 elif status_code == 1008:
303 error_msg = 'This content is only available for premium users'
304 if self._ACCESS_TOKEN:
305 raise ExtractorError(error_msg, expected=True)
306 self.raise_login_required(f'{error_msg}. {self._LOGIN_HINT}', method=None)
307 elif status_code == 400:
308 raise ExtractorError('The requested content is not available', expected=True)
309 elif status_code is not None and status_code != 200:
310 raise ExtractorError(
311 f'JioCinema says: {traverse_obj(playback, ("message", {str})) or status_code}')
313 metadata = self._download_json(
314 f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/query/asset-details',
315 video_id, fatal=False, query={
316 'ids': f'include:{video_id}',
317 'responseType': 'common',
318 'devicePlatformType': 'desktop',
321 return {
322 'id': video_id,
323 'http_headers': self._API_HEADERS,
324 **self._extract_formats_and_subtitles(playback, video_id),
325 **traverse_obj(playback, ('data', {
326 # fallback metadata
327 'title': ('name', {str}),
328 'description': ('fullSynopsis', {str}),
329 'series': ('show', 'name', {str}, filter),
330 'season': ('tournamentName', {str}, {lambda x: x if x != 'Season 0' else None}),
331 'season_number': ('episode', 'season', {int_or_none}, filter),
332 'episode': ('fullTitle', {str}),
333 'episode_number': ('episode', 'episodeNo', {int_or_none}, filter),
334 'age_limit': ('ageNemonic', {parse_age_limit}),
335 'duration': ('totalDuration', {float_or_none}),
336 'thumbnail': ('images', {url_or_none}),
337 })),
338 **traverse_obj(metadata, ('result', 0, {
339 'title': ('fullTitle', {str}),
340 'description': ('fullSynopsis', {str}),
341 'series': ('showName', {str}, filter),
342 'season': ('seasonName', {str}, filter),
343 'season_number': ('season', {int_or_none}),
344 'season_id': ('seasonId', {str}, filter),
345 'episode': ('fullTitle', {str}),
346 'episode_number': ('episode', {int_or_none}),
347 'timestamp': ('uploadTime', {int_or_none}),
348 'release_date': ('telecastDate', {str}),
349 'age_limit': ('ageNemonic', {parse_age_limit}),
350 'duration': ('duration', {float_or_none}),
351 'genres': ('genres', ..., {str}),
352 'thumbnail': ('seo', 'ogImage', {url_or_none}),
353 })),
357 class JioCinemaSeriesIE(JioCinemaBaseIE):
358 IE_NAME = 'jiocinema:series'
359 _VALID_URL = r'https?://(?:www\.)?jiocinema\.com/tv-shows/(?P<slug>[\w-]+)/(?P<id>\d{3,})'
360 _TESTS = [{
361 'url': 'https://www.jiocinema.com/tv-shows/naagin/3499917',
362 'info_dict': {
363 'id': '3499917',
364 'title': 'naagin',
366 'playlist_mincount': 120,
367 }, {
368 'url': 'https://www.jiocinema.com/tv-shows/mtv-splitsvilla-x5/3499820',
369 'info_dict': {
370 'id': '3499820',
371 'title': 'mtv-splitsvilla-x5',
373 'playlist_mincount': 310,
376 def _entries(self, series_id):
377 seasons = traverse_obj(self._download_json(
378 f'{self._METADATA_API_BASE}/voot/v1/voot-web/view/show/{series_id}', series_id,
379 'Downloading series metadata JSON', query={'responseType': 'common'}), (
380 'trays', lambda _, v: v['trayId'] == 'season-by-show-multifilter',
381 'trayTabs', lambda _, v: v['id']))
383 for season_num, season in enumerate(seasons, start=1):
384 season_id = season['id']
385 label = season.get('label') or season_num
386 for page_num in itertools.count(1):
387 episodes = traverse_obj(self._download_json(
388 f'{self._METADATA_API_BASE}/voot/v1/voot-web/content/generic/series-wise-episode',
389 season_id, f'Downloading season {label} page {page_num} JSON', query={
390 'sort': 'episode:asc',
391 'id': season_id,
392 'responseType': 'common',
393 'page': page_num,
394 }), ('result', lambda _, v: v['id'] and url_or_none(v['slug'])))
395 if not episodes:
396 break
397 for episode in episodes:
398 yield self.url_result(
399 episode['slug'], JioCinemaIE, **traverse_obj(episode, {
400 'video_id': 'id',
401 'video_title': ('fullTitle', {str}),
402 'season_number': ('season', {int_or_none}),
403 'episode_number': ('episode', {int_or_none}),
406 def _real_extract(self, url):
407 slug, series_id = self._match_valid_url(url).group('slug', 'id')
408 return self.playlist_result(self._entries(series_id), series_id, slug)