yt_dlp/extractor/sonyliv.py

   1 import datetime as dt
   2 import itertools
   3 import json
   4 import math
   5 import random
   6 import time
   7 import uuid
   8
   9 from .common import InfoExtractor
  10 from ..networking.exceptions import HTTPError
  11 from ..utils import (
  12     ExtractorError,
  13     int_or_none,
  14     jwt_decode_hs256,
  15     try_call,
  16 )
  17 from ..utils.traversal import traverse_obj
  18
  19
  20 class SonyLIVIE(InfoExtractor):
  21     _VALID_URL = r'''(?x)
  22                      (?:
  23                         sonyliv:|
  24                         https?://(?:www\.)?sonyliv\.com/(?:s(?:how|port)s/[^/]+|movies|clip|trailer|music-videos)/[^/?#&]+-
  25                     )
  26                     (?P<id>\d+)
  27                   '''
  28     _TESTS = [{
  29         'url': 'https://www.sonyliv.com/shows/bachelors-delight-1700000113/achaari-cheese-toast-1000022678?watch=true',
  30         'info_dict': {
  31             'title': 'Achaari Cheese Toast',
  32             'id': '1000022678',
  33             'ext': 'mp4',
  34             'upload_date': '20200411',
  35             'description': 'md5:3957fa31d9309bf336ceb3f37ad5b7cb',
  36             'timestamp': 1586632091,
  37             'duration': 185,
  38             'season_number': 1,
  39             'series': 'Bachelors Delight',
  40             'episode_number': 1,
  41             'release_year': 2016,
  42         },
  43         'params': {
  44             'skip_download': True,
  45         },
  46     }, {
  47         'url': 'https://www.sonyliv.com/movies/tahalka-1000050121?watch=true',
  48         'only_matching': True,
  49     }, {
  50         'url': 'https://www.sonyliv.com/clip/jigarbaaz-1000098925',
  51         'only_matching': True,
  52     }, {
  53         'url': 'https://www.sonyliv.com/trailer/sandwiched-forever-1000100286?watch=true',
  54         'only_matching': True,
  55     }, {
  56         'url': 'https://www.sonyliv.com/sports/india-tour-of-australia-2020-21-1700000286/cricket-hls-day-3-1st-test-aus-vs-ind-19-dec-2020-1000100959?watch=true',
  57         'only_matching': True,
  58     }, {
  59         'url': 'https://www.sonyliv.com/music-videos/yeh-un-dinon-ki-baat-hai-1000018779',
  60         'only_matching': True,
  61     }]
  62     _GEO_COUNTRIES = ['IN']
  63     _HEADERS = {}
  64     _LOGIN_HINT = 'Use "--username <mobile_number>" to login using OTP or "--username token --password <auth_token>" to login using auth token.'
  65     _NETRC_MACHINE = 'sonyliv'
  66
  67     def _get_device_id(self):
  68         e = int(time.time() * 1000)
  69         t = list('xxxxxxxxxxxx4xxxyxxxxxxxxxxxxxxx')
  70         for i, c in enumerate(t):
  71             n = int((e + 16 * random.random()) % 16) | 0
  72             e = math.floor(e / 16)
  73             if c == 'x':
  74                 t[i] = str(n)
  75             elif c == 'y':
  76                 t[i] = f'{3 & n | 8:x}'
  77         return ''.join(t) + '-' + str(int(time.time() * 1000))
  78
  79     def _perform_login(self, username, password):
  80         self._HEADERS['device_id'] = self._get_device_id()
  81         self._HEADERS['content-type'] = 'application/json'
  82
  83         if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
  84             self._HEADERS['authorization'] = password
  85             self.report_login()
  86             return
  87         elif len(username) != 10 or not username.isdigit():
  88             raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
  89
  90         self.report_login()
  91         otp_request_json = self._download_json(
  92             'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2',
  93             None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({
  94                 'mobileNumber': username,
  95                 'channelPartnerID': 'MSMIND',
  96                 'country': 'IN',
  97                 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
  98                 'otpSize': 6,
  99                 'loginType': 'REGISTERORSIGNIN',
 100                 'isMobileMandatory': True,
 101             }).encode())
 102         if otp_request_json['resultCode'] == 'KO':
 103             raise ExtractorError(otp_request_json['message'], expected=True)
 104
 105         otp_verify_json = self._download_json(
 106             'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2',
 107             None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({
 108                 'channelPartnerID': 'MSMIND',
 109                 'mobileNumber': username,
 110                 'country': 'IN',
 111                 'otp': self._get_tfa_info('OTP'),
 112                 'dmaId': 'IN',
 113                 'ageConfirmation': True,
 114                 'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
 115                 'isMobileMandatory': True,
 116             }).encode())
 117         if otp_verify_json['resultCode'] == 'KO':
 118             raise ExtractorError(otp_request_json['message'], expected=True)
 119         self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
 120
 121     def _call_api(self, version, path, video_id):
 122         try:
 123             return self._download_json(
 124                 f'https://apiv2.sonyliv.com/AGL/{version}/A/ENG/WEB/{path}',
 125                 video_id, headers=self._HEADERS)['resultObj']
 126         except ExtractorError as e:
 127             if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json(
 128                     e.cause.response.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
 129                 self.raise_login_required(self._LOGIN_HINT, method=None)
 130             if isinstance(e.cause, HTTPError) and e.cause.status == 403:
 131                 message = self._parse_json(
 132                     e.cause.response.read().decode(), video_id)['message']
 133                 if message == 'Geoblocked Country':
 134                     self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
 135                 raise ExtractorError(message)
 136             raise
 137
 138     def _initialize_pre_login(self):
 139         self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None)
 140
 141     def _real_extract(self, url):
 142         video_id = self._match_id(url)
 143         content = self._call_api(
 144             '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
 145         if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
 146             self.report_drm(video_id)
 147         dash_url = content['videoURL']
 148         headers = {
 149             'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000),
 150         }
 151         formats = self._extract_mpd_formats(
 152             dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
 153         formats.extend(self._extract_m3u8_formats(
 154             dash_url.replace('.mpd', '.m3u8').replace('/DASH/', '/HLS/'),
 155             video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
 156         for f in formats:
 157             f.setdefault('http_headers', {}).update(headers)
 158
 159         metadata = self._call_api(
 160             '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
 161         title = metadata['episodeTitle']
 162         subtitles = {}
 163         for sub in content.get('subtitle', []):
 164             sub_url = sub.get('subtitleUrl')
 165             if not sub_url:
 166                 continue
 167             subtitles.setdefault(sub.get('subtitleLanguageName', 'ENG'), []).append({
 168                 'url': sub_url,
 169             })
 170         return {
 171             'id': video_id,
 172             'title': title,
 173             'formats': formats,
 174             'thumbnail': content.get('posterURL'),
 175             'description': metadata.get('longDescription') or metadata.get('shortDescription'),
 176             'timestamp': int_or_none(metadata.get('creationDate'), 1000),
 177             'duration': int_or_none(metadata.get('duration')),
 178             'season_number': int_or_none(metadata.get('season')),
 179             'series': metadata.get('title'),
 180             'episode_number': int_or_none(metadata.get('episodeNumber')),
 181             'release_year': int_or_none(metadata.get('year')),
 182             'subtitles': subtitles,
 183         }
 184
 185
 186 class SonyLIVSeriesIE(InfoExtractor):
 187     _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})/?(?:$|[?#])'
 188     _TESTS = [{
 189         'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
 190         'playlist_mincount': 452,
 191         'info_dict': {
 192             'id': '1700000091',
 193         },
 194     }, {
 195         'url': 'https://www.sonyliv.com/shows/beyhadh-1700000007/',
 196         'playlist_mincount': 358,
 197         'info_dict': {
 198             'id': '1700000007',
 199         },
 200     }]
 201     _API_BASE = 'https://apiv2.sonyliv.com/AGL'
 202
 203     def _entries(self, show_id):
 204         headers = {
 205             'Accept': 'application/json, text/plain, */*',
 206             'Referer': 'https://www.sonyliv.com',
 207         }
 208         headers['security_token'] = self._download_json(
 209             f'{self._API_BASE}/1.4/A/ENG/WEB/ALL/GETTOKEN', show_id,
 210             'Downloading security token', headers=headers)['resultObj']
 211         seasons = traverse_obj(self._download_json(
 212             f'{self._API_BASE}/1.9/R/ENG/WEB/IN/DL/DETAIL/{show_id}', show_id,
 213             'Downloading series JSON', headers=headers, query={
 214                 'kids_safe': 'false',
 215                 'from': '0',
 216                 'to': '49',
 217             }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
 218         for season in seasons:
 219             season_id = str(season['id'])
 220             note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
 221             cursor = 0
 222             for page_num in itertools.count(1):
 223                 episodes = traverse_obj(self._download_json(
 224                     f'{self._API_BASE}/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{season_id}',
 225                     season_id, f'Downloading {note} page {page_num} JSON', headers=headers, query={
 226                         'from': str(cursor),
 227                         'to': str(cursor + 99),
 228                         'orderBy': 'episodeNumber',
 229                         'sortOrder': 'asc',
 230                     }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
 231                 if not episodes:
 232                     break
 233                 for episode in episodes:
 234                     video_id = str(episode['id'])
 235                     yield self.url_result(f'sonyliv:{video_id}', SonyLIVIE, video_id)
 236                 cursor += 100
 237
 238     def _real_extract(self, url):
 239         show_id = self._match_id(url)
 240         return self.playlist_result(self._entries(show_id), playlist_id=show_id)