yt_dlp/extractor/pornbox.py

   1
   2 from .common import InfoExtractor
   3 from ..utils import (
   4     int_or_none,
   5     parse_duration,
   6     parse_iso8601,
   7     qualities,
   8     str_or_none,
   9     traverse_obj,
  10     url_or_none,
  11 )
  12
  13
  14 class PornboxIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P<id>[0-9]+)'
  16     _TESTS = [{
  17         'url': 'https://pornbox.com/application/watch-page/212108',
  18         'md5': '3ff6b6e206f263be4c5e987a3162ac6e',
  19         'info_dict': {
  20             'id': '212108',
  21             'ext': 'mp4',
  22             'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49',
  23             'uploader': 'Lily Strong',
  24             'timestamp': 1665871200,
  25             'upload_date': '20221015',
  26             'age_limit': 18,
  27             'availability': 'needs_auth',
  28             'duration': 1505,
  29             'cast': ['Lily Strong', 'John Strong'],
  30             'tags': 'count:11',
  31             'description': 'md5:589c7f33e183aa8aa939537300efb859',
  32             'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
  33         },
  34     }, {
  35         'url': 'https://pornbox.com/application/watch-page/216045',
  36         'info_dict': {
  37             'id': '216045',
  38             'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2',
  39             'description': 'md5:3e631dcaac029f15ed434e402d1b06c7',
  40             'uploader': 'VK Studio',
  41             'timestamp': 1618264800,
  42             'upload_date': '20210412',
  43             'age_limit': 18,
  44             'availability': 'premium_only',
  45             'duration': 2710,
  46             'cast': 'count:3',
  47             'tags': 'count:29',
  48             'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$',
  49             'subtitles': 'count:6',
  50         },
  51         'params': {
  52             'skip_download': True,
  53             'ignore_no_formats_error': True,
  54         },
  55         'expected_warnings': [
  56             'You are either not logged in or do not have access to this scene',
  57             'No video formats found', 'Requested format is not available'],
  58     }]
  59
  60     def _real_extract(self, url):
  61         video_id = self._match_id(url)
  62
  63         public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id)
  64
  65         subtitles = {country_code: [{
  66             'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}',
  67             'ext': 'srt',
  68         }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))}
  69
  70         is_free_scene = traverse_obj(
  71             public_data, ('price', 'is_available_for_free', {bool}), default=False)
  72
  73         metadata = {
  74             'id': video_id,
  75             **traverse_obj(public_data, {
  76                 'title': ('scene_name', {str.strip}),
  77                 'description': ('small_description', {str.strip}),
  78                 'uploader': 'studio',
  79                 'duration': ('runtime', {parse_duration}),
  80                 'cast': (('models', 'male_models'), ..., 'model_name'),
  81                 'thumbnail': ('player_poster', {url_or_none}),
  82                 'tags': ('niches', ..., 'niche'),
  83             }),
  84             'age_limit': 18,
  85             'timestamp': parse_iso8601(traverse_obj(
  86                 public_data, ('studios', 'release_date'), 'publish_date')),
  87             'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene),
  88             'subtitles': subtitles,
  89         }
  90
  91         if not public_data.get('is_purchased') or not is_free_scene:
  92             self.raise_login_required(
  93                 'You are either not logged in or do not have access to this scene', metadata_available=True)
  94             return metadata
  95
  96         media_id = traverse_obj(public_data, (
  97             'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False)
  98         if not media_id:
  99             self.raise_no_formats('Could not find stream id', video_id=video_id)
 100
 101         stream_data = self._download_json(
 102             f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls')
 103
 104         get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k'])
 105         metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], {
 106             'url': 'src',
 107             'vbr': ('bitrate', {int_or_none(scale=1000)}),
 108             'format_id': ('quality', {str_or_none}),
 109             'quality': ('quality', {get_quality}),
 110             'width': ('size', {lambda x: int(x[:-1])}),
 111         }))
 112
 113         return metadata