yt_dlp/extractor/vvvvid.py

   1 import functools
   2 import re
   3
   4 from .common import InfoExtractor
   5 from .youtube import YoutubeIE
   6 from ..utils import (
   7     ExtractorError,
   8     int_or_none,
   9     str_or_none,
  10 )
  11
  12
  13 class VVVVIDIE(InfoExtractor):
  14     _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
  15     _VALID_URL = rf'{_VALID_URL_BASE}(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)'
  16     _TESTS = [{
  17         # video_type == 'video/vvvvid'
  18         'url': 'https://www.vvvvid.it/show/498/the-power-of-computing/518/505692/playstation-vr-cambiera-il-nostro-modo-di-giocare',
  19         'info_dict': {
  20             'id': '505692',
  21             'ext': 'mp4',
  22             'title': 'Playstation VR cambierà il nostro modo di giocare',
  23             'duration': 93,
  24             'series': 'The Power of Computing',
  25             'season_id': '518',
  26             'episode': 'Playstation VR cambierà il nostro modo di giocare',
  27             'episode_id': '4747',
  28             'view_count': int,
  29             'like_count': int,
  30             'repost_count': int,
  31             'thumbnail': 'https://static.vvvvid.it/img/zoomin/28CA2409-E663-34F0-2B02E72356556EA3_500k.jpg',
  32         },
  33         'params': {
  34             'skip_download': True,
  35         },
  36     }, {
  37         # video_type == 'video/rcs'
  38         'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
  39         'info_dict': {
  40             'id': '482493',
  41             'ext': 'mp4',
  42             'title': 'Episodio 01',
  43         },
  44         'params': {
  45             'skip_download': True,
  46         },
  47         'skip': 'Every video/rcs is not working even in real website',
  48     }, {
  49         # video_type == 'video/youtube'
  50         'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
  51         'md5': '33e0edfba720ad73a8782157fdebc648',
  52         'info_dict': {
  53             'id': 'RzmFKUDOUgw',
  54             'ext': 'mp4',
  55             'title': 'Trailer',
  56             'upload_date': '20150906',
  57             'description': 'md5:a5e802558d35247fee285875328c0b80',
  58             'uploader_id': '@EMOTIONLabelChannel',
  59             'uploader': 'EMOTION Label Channel',
  60             'episode_id': '3115',
  61             'view_count': int,
  62             'like_count': int,
  63             'repost_count': int,
  64             'availability': str,
  65             'categories': list,
  66             'age_limit': 0,
  67             'channel': 'EMOTION Label Channel',
  68             'channel_follower_count': int,
  69             'channel_id': 'UCQ5URCSs1f5Cz9rh-cDGxNQ',
  70             'channel_url': 'https://www.youtube.com/channel/UCQ5URCSs1f5Cz9rh-cDGxNQ',
  71             'comment_count': int,
  72             'duration': 133,
  73             'episode': 'Trailer',
  74             'heatmap': list,
  75             'live_status': 'not_live',
  76             'playable_in_embed': True,
  77             'season_id': '406',
  78             'series': 'One-Punch Man',
  79             'tags': list,
  80             'uploader_url': 'https://www.youtube.com/@EMOTIONLabelChannel',
  81             'thumbnail': 'https://i.ytimg.com/vi/RzmFKUDOUgw/maxresdefault.jpg',
  82         },
  83         'params': {
  84             'skip_download': True,
  85         },
  86     }, {
  87         # video_type == 'video/dash'
  88         'url': 'https://www.vvvvid.it/show/844/le-bizzarre-avventure-di-jojo-vento-aureo/938/527551/golden-wind',
  89         'info_dict': {
  90             'id': '527551',
  91             'ext': 'mp4',
  92             'title': 'Golden Wind',
  93             'duration': 1430,
  94             'series': 'Le bizzarre avventure di Jojo - Vento Aureo',
  95             'season_id': '938',
  96             'episode': 'Golden Wind',
  97             'episode_number': 1,
  98             'episode_id': '9089',
  99             'view_count': int,
 100             'like_count': int,
 101             'repost_count': int,
 102             'thumbnail': 'https://static.vvvvid.it/img/thumbs/Dynit/Jojo/Jojo_S05Ep01-t.jpg',
 103             'season': 'Season 5',
 104             'season_number': 5,
 105         },
 106         'params': {
 107             'skip_download': True,
 108             'format': 'mp4',
 109         },
 110     }, {
 111         'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
 112         'only_matching': True,
 113     }]
 114     _conn_id = None
 115
 116     @functools.cached_property
 117     def _headers(self):
 118         return {
 119             **self.geo_verification_headers(),
 120             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.50 Safari/537.37',
 121         }
 122
 123     def _real_initialize(self):
 124         self._conn_id = self._download_json(
 125             'https://www.vvvvid.it/user/login',
 126             None, headers=self._headers)['data']['conn_id']
 127
 128     def _download_info(self, show_id, path, video_id, fatal=True, query=None):
 129         q = {
 130             'conn_id': self._conn_id,
 131         }
 132         if query:
 133             q.update(query)
 134         response = self._download_json(
 135             f'https://www.vvvvid.it/vvvvid/ondemand/{show_id}/{path}',
 136             video_id, headers=self._headers, query=q, fatal=fatal)
 137         if not (response or fatal):
 138             return
 139         if response.get('result') == 'error':
 140             raise ExtractorError('{} said: {}'.format(
 141                 self.IE_NAME, response['message']), expected=True)
 142         return response['data']
 143
 144     def _extract_common_video_info(self, video_data):
 145         return {
 146             'thumbnail': video_data.get('thumbnail'),
 147             'episode_id': str_or_none(video_data.get('id')),
 148         }
 149
 150     def _real_extract(self, url):
 151         show_id, season_id, video_id = self._match_valid_url(url).groups()
 152
 153         response = self._download_info(
 154             show_id, f'season/{season_id}',
 155             video_id, query={'video_id': video_id})
 156
 157         vid = int(video_id)
 158         video_data = next(filter(
 159             lambda episode: episode.get('video_id') == vid, response))
 160         title = video_data['title']
 161         formats = []
 162
 163         # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
 164         def ds(h):
 165             g = 'MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij'
 166
 167             def f(m):
 168                 l = []
 169                 o = 0
 170                 b = False
 171                 m_len = len(m)
 172                 while ((not b) and o < m_len):
 173                     n = m[o] << 2
 174                     o += 1
 175                     k = -1
 176                     j = -1
 177                     if o < m_len:
 178                         n += m[o] >> 4
 179                         o += 1
 180                         if o < m_len:
 181                             k = (m[o - 1] << 4) & 255
 182                             k += m[o] >> 2
 183                             o += 1
 184                             if o < m_len:
 185                                 j = (m[o - 1] << 6) & 255
 186                                 j += m[o]
 187                                 o += 1
 188                             else:
 189                                 b = True
 190                         else:
 191                             b = True
 192                     else:
 193                         b = True
 194                     l.append(n)
 195                     if k != -1:
 196                         l.append(k)
 197                     if j != -1:
 198                         l.append(j)
 199                 return l
 200
 201             c = []
 202             for e in h:
 203                 c.append(g.index(e))
 204
 205             c_len = len(c)
 206             for e in range(c_len * 2 - 1, -1, -1):
 207                 a = c[e % c_len] ^ c[(e + 1) % c_len]
 208                 c[e % c_len] = a
 209
 210             c = f(c)
 211             d = ''
 212             for e in c:
 213                 d += chr(e)
 214
 215             return d
 216
 217         info = {}
 218
 219         def metadata_from_url(r_url):
 220             if not info and r_url:
 221                 mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
 222                 if mobj:
 223                     info['episode_number'] = int(mobj.group(2))
 224                     season_number = mobj.group(1)
 225                     if season_number:
 226                         info['season_number'] = int(season_number)
 227
 228         video_type = video_data.get('video_type')
 229         is_youtube = False
 230         for quality in ('', '_sd'):
 231             embed_code = video_data.get('embed_info' + quality)
 232             if not embed_code:
 233                 continue
 234             embed_code = ds(embed_code)
 235             if video_type == 'video/kenc':
 236                 embed_code = re.sub(r'https?(://[^/]+)/z/', r'https\1/i/', embed_code).replace('/manifest.f4m', '/master.m3u8')
 237                 kenc = self._download_json(
 238                     'https://www.vvvvid.it/kenc', video_id, query={
 239                         'action': 'kt',
 240                         'conn_id': self._conn_id,
 241                         'url': embed_code,
 242                     }, fatal=False) or {}
 243                 kenc_message = kenc.get('message')
 244                 if kenc_message:
 245                     embed_code += '?' + ds(kenc_message)
 246                 formats.extend(self._extract_m3u8_formats(
 247                     embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
 248             elif video_type == 'video/rcs':
 249                 formats.extend(self._extract_akamai_formats(embed_code, video_id))
 250             elif video_type == 'video/youtube':
 251                 info.update({
 252                     '_type': 'url_transparent',
 253                     'ie_key': YoutubeIE.ie_key(),
 254                     'url': embed_code,
 255                 })
 256                 is_youtube = True
 257                 break
 258             elif video_type == 'video/dash':
 259                 formats.extend(self._extract_m3u8_formats(
 260                     embed_code, video_id, 'mp4', m3u8_id='hls', fatal=False))
 261             else:
 262                 formats.extend(self._extract_wowza_formats(
 263                     f'http://sb.top-ix.org/videomg/_definst_/mp4:{embed_code}/playlist.m3u8', video_id, skip_protocols=['f4m']))
 264             metadata_from_url(embed_code)
 265
 266         if not is_youtube:
 267             info['formats'] = formats
 268
 269         metadata_from_url(video_data.get('thumbnail'))
 270         info.update(self._extract_common_video_info(video_data))
 271         info.update({
 272             'id': video_id,
 273             'title': title,
 274             'duration': int_or_none(video_data.get('length')),
 275             'series': video_data.get('show_title'),
 276             'season_id': season_id,
 277             'episode': title,
 278             'view_count': int_or_none(video_data.get('views')),
 279             'like_count': int_or_none(video_data.get('video_likes')),
 280             'repost_count': int_or_none(video_data.get('video_shares')),
 281         })
 282         return info
 283
 284
 285 class VVVVIDShowIE(VVVVIDIE):  # XXX: Do not subclass from concrete IE
 286     _VALID_URL = rf'(?P<base_url>{VVVVIDIE._VALID_URL_BASE}(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)'
 287     _TESTS = [{
 288         'url': 'https://www.vvvvid.it/show/156/psyco-pass',
 289         'info_dict': {
 290             'id': '156',
 291             'title': 'Psycho-Pass',
 292             'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
 293         },
 294         'playlist_count': 46,
 295     }, {
 296         'url': 'https://www.vvvvid.it/show/156',
 297         'only_matching': True,
 298     }]
 299
 300     def _real_extract(self, url):
 301         base_url, show_id, show_title = self._match_valid_url(url).groups()
 302
 303         seasons = self._download_info(
 304             show_id, 'seasons/', show_title)
 305
 306         show_info = self._download_info(
 307             show_id, 'info/', show_title, fatal=False)
 308
 309         if not show_title:
 310             base_url += '/title'
 311
 312         entries = []
 313         for season in (seasons or []):
 314             episodes = season.get('episodes') or []
 315             playlist_title = season.get('name') or show_info.get('title')
 316             for episode in episodes:
 317                 if episode.get('playable') is False:
 318                     continue
 319                 season_id = str_or_none(episode.get('season_id'))
 320                 video_id = str_or_none(episode.get('video_id'))
 321                 if not (season_id and video_id):
 322                     continue
 323                 info = self._extract_common_video_info(episode)
 324                 info.update({
 325                     '_type': 'url_transparent',
 326                     'ie_key': VVVVIDIE.ie_key(),
 327                     'url': '/'.join([base_url, season_id, video_id]),
 328                     'title': episode.get('title'),
 329                     'description': episode.get('description'),
 330                     'season_id': season_id,
 331                     'playlist_title': playlist_title,
 332                 })
 333                 entries.append(info)
 334
 335         return self.playlist_result(
 336             entries, show_id, show_info.get('title'), show_info.get('description'))