yt_dlp/extractor/daftsex.py

   1 from .common import InfoExtractor
   2 from ..compat import compat_b64decode
   3 from ..utils import (
   4     ExtractorError,
   5     int_or_none,
   6     js_to_json,
   7     parse_count,
   8     parse_duration,
   9     traverse_obj,
  10     try_get,
  11     unified_timestamp,
  12 )
  13
  14
  15 class DaftsexIE(InfoExtractor):
  16     _VALID_URL = r'https?://(?:www\.)?daft\.sex/watch/(?P<id>-?\d+_\d+)'
  17     _TESTS = [{
  18         'url': 'https://daft.sex/watch/-35370899_456246186',
  19         'md5': '64c04ef7b4c7b04b308f3b0c78efe7cd',
  20         'info_dict': {
  21             'id': '-35370899_456246186',
  22             'ext': 'mp4',
  23             'title': 'just relaxing',
  24             'description': 'just relaxing – Watch video Watch video in high quality',
  25             'upload_date': '20201113',
  26             'timestamp': 1605261911,
  27             'thumbnail': r're:^https?://.*\.jpg$',
  28             'age_limit': 18,
  29             'duration': 15.0,
  30             'view_count': int
  31         },
  32     }, {
  33         'url': 'https://daft.sex/watch/-156601359_456242791',
  34         'info_dict': {
  35             'id': '-156601359_456242791',
  36             'ext': 'mp4',
  37             'title': 'Skye Blue - Dinner And A Show',
  38             'description': 'Skye Blue - Dinner And A Show - Watch video Watch video in high quality',
  39             'upload_date': '20200916',
  40             'timestamp': 1600250735,
  41             'thumbnail': 'https://psv153-1.crazycloud.ru/videos/-156601359/456242791/thumb.jpg?extra=i3D32KaBbBFf9TqDRMAVmQ',
  42         },
  43         'skip': 'deleted / private'
  44     }]
  45
  46     def _real_extract(self, url):
  47         video_id = self._match_id(url)
  48         webpage = self._download_webpage(url, video_id)
  49         title = self._html_search_meta('name', webpage, 'title')
  50         timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
  51         description = self._html_search_meta('description', webpage, 'Description', default=None)
  52
  53         duration = parse_duration(self._search_regex(
  54             r'Duration: ((?:[0-9]{2}:){0,2}[0-9]{2})',
  55             webpage, 'duration', fatal=False))
  56         views = parse_count(self._search_regex(
  57             r'Views: ([0-9 ]+)',
  58             webpage, 'views', fatal=False))
  59
  60         player_hash = self._search_regex(
  61             r'DaxabPlayer\.Init\({[\s\S]*hash:\s*"([0-9a-zA-Z_\-]+)"[\s\S]*}',
  62             webpage, 'player hash')
  63         player_color = self._search_regex(
  64             r'DaxabPlayer\.Init\({[\s\S]*color:\s*"([0-9a-z]+)"[\s\S]*}',
  65             webpage, 'player color', fatal=False) or ''
  66
  67         embed_page = self._download_webpage(
  68             'https://dxb.to/player/%s?color=%s' % (player_hash, player_color),
  69             video_id, headers={'Referer': url})
  70         video_params = self._parse_json(
  71             self._search_regex(
  72                 r'window\.globParams\s*=\s*({[\S\s]+})\s*;\s*<\/script>',
  73                 embed_page, 'video parameters'),
  74             video_id, transform_source=js_to_json)
  75
  76         server_domain = 'https://%s' % compat_b64decode(video_params['server'][::-1]).decode('utf-8')
  77
  78         cdn_files = traverse_obj(video_params, ('video', 'cdn_files')) or {}
  79         if cdn_files:
  80             formats = []
  81             for format_id, format_data in cdn_files.items():
  82                 ext, height = format_id.split('_')
  83                 formats.append({
  84                     'format_id': format_id,
  85                     'url': f'{server_domain}/videos/{video_id.replace("_", "/")}/{height}.mp4?extra={format_data.split(".")[-1]}',
  86                     'height': int_or_none(height),
  87                     'ext': ext,
  88                 })
  89
  90             return {
  91                 'id': video_id,
  92                 'title': title,
  93                 'formats': formats,
  94                 'description': description,
  95                 'duration': duration,
  96                 'thumbnail': try_get(video_params, lambda vi: 'https:' + compat_b64decode(vi['video']['thumb']).decode('utf-8')),
  97                 'timestamp': timestamp,
  98                 'view_count': views,
  99                 'age_limit': 18,
 100             }
 101
 102         items = self._download_json(
 103             f'{server_domain}/method/video.get/{video_id}', video_id,
 104             headers={'Referer': url}, query={
 105                 'token': video_params['video']['access_token'],
 106                 'videos': video_id,
 107                 'ckey': video_params['c_key'],
 108                 'credentials': video_params['video']['credentials'],
 109             })['response']['items']
 110
 111         if not items:
 112             raise ExtractorError('Video is not available', video_id=video_id, expected=True)
 113
 114         item = items[0]
 115         formats = []
 116         for f_id, f_url in item.get('files', {}).items():
 117             if f_id == 'external':
 118                 return self.url_result(f_url)
 119             ext, height = f_id.split('_')
 120             height_extra_key = traverse_obj(video_params, ('video', 'partial', 'quality', height))
 121             if height_extra_key:
 122                 formats.append({
 123                     'format_id': f'{height}p',
 124                     'url': f'{server_domain}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
 125                     'height': int_or_none(height),
 126                     'ext': ext,
 127                 })
 128
 129         thumbnails = []
 130         for k, v in item.items():
 131             if k.startswith('photo_') and v:
 132                 width = k.replace('photo_', '')
 133                 thumbnails.append({
 134                     'id': width,
 135                     'url': v,
 136                     'width': int_or_none(width),
 137                 })
 138
 139         return {
 140             'id': video_id,
 141             'title': title,
 142             'formats': formats,
 143             'comment_count': int_or_none(item.get('comments')),
 144             'description': description,
 145             'duration': duration,
 146             'thumbnails': thumbnails,
 147             'timestamp': timestamp,
 148             'view_count': views,
 149             'age_limit': 18,
 150         }