yt_dlp/extractor/fourtube.py

   1 import base64
   2 import re
   3 import urllib.parse
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     int_or_none,
   8     parse_duration,
   9     parse_iso8601,
  10     str_or_none,
  11     str_to_int,
  12     try_get,
  13     unified_timestamp,
  14     url_or_none,
  15 )
  16
  17
  18 class FourTubeBaseIE(InfoExtractor):
  19     def _extract_formats(self, url, video_id, media_id, sources):
  20         token_url = 'https://{}/{}/desktop/{}'.format(
  21             self._TKN_HOST, media_id, '+'.join(sources))
  22
  23         parsed_url = urllib.parse.urlparse(url)
  24         tokens = self._download_json(token_url, video_id, data=b'', headers={
  25             'Origin': f'{parsed_url.scheme}://{parsed_url.hostname}',
  26             'Referer': url,
  27         })
  28         return [{
  29             'url': tokens[res]['token'],
  30             'format_id': res + 'p',
  31             'resolution': res + 'p',
  32             'quality': int(res),
  33         } for res in sources]
  34
  35     def _real_extract(self, url):
  36         mobj = self._match_valid_url(url)
  37         kind, video_id, display_id = mobj.group('kind', 'id', 'display_id')
  38
  39         if kind == 'm' or not display_id:
  40             url = self._URL_TEMPLATE % video_id
  41
  42         webpage = self._download_webpage(url, video_id)
  43
  44         title = self._html_search_meta('name', webpage)
  45         timestamp = parse_iso8601(self._html_search_meta(
  46             'uploadDate', webpage))
  47         thumbnail = self._html_search_meta('thumbnailUrl', webpage)
  48         uploader_id = self._html_search_regex(
  49             r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/([^/"]+)" title="Go to [^"]+ page">',
  50             webpage, 'uploader id', fatal=False)
  51         uploader = self._html_search_regex(
  52             r'<a class="item-to-subscribe" href="[^"]+/(?:channel|user)s?/[^/"]+" title="Go to ([^"]+) page">',
  53             webpage, 'uploader', fatal=False)
  54
  55         categories_html = self._search_regex(
  56             r'(?s)><i class="icon icon-tag"></i>\s*Categories / Tags\s*.*?<ul class="[^"]*?list[^"]*?">(.*?)</ul>',
  57             webpage, 'categories', fatal=False)
  58         categories = None
  59         if categories_html:
  60             categories = [
  61                 c.strip() for c in re.findall(
  62                     r'(?s)<li><a.*?>(.*?)</a>', categories_html)]
  63
  64         view_count = str_to_int(self._search_regex(
  65             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([0-9,]+)">',
  66             webpage, 'view count', default=None))
  67         like_count = str_to_int(self._search_regex(
  68             r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserLikes:([0-9,]+)">',
  69             webpage, 'like count', default=None))
  70         duration = parse_duration(self._html_search_meta('duration', webpage))
  71
  72         media_id = self._search_regex(
  73             r'<button[^>]+data-id=(["\'])(?P<id>\d+)\1[^>]+data-quality=', webpage,
  74             'media id', default=None, group='id')
  75         sources = [
  76             quality
  77             for _, quality in re.findall(r'<button[^>]+data-quality=(["\'])(.+?)\1', webpage)]
  78         if not (media_id and sources):
  79             player_js = self._download_webpage(
  80                 self._search_regex(
  81                     r'<script[^>]id=(["\'])playerembed\1[^>]+src=(["\'])(?P<url>.+?)\2',
  82                     webpage, 'player JS', group='url'),
  83                 video_id, 'Downloading player JS')
  84             params_js = self._search_regex(
  85                 r'\$\.ajax\(url,\ opts\);\s*\}\s*\}\)\(([0-9,\[\] ]+)\)',
  86                 player_js, 'initialization parameters')
  87             params = self._parse_json(f'[{params_js}]', video_id)
  88             media_id = params[0]
  89             sources = [f'{p}' for p in params[2]]
  90
  91         formats = self._extract_formats(url, video_id, media_id, sources)
  92
  93         return {
  94             'id': video_id,
  95             'title': title,
  96             'formats': formats,
  97             'categories': categories,
  98             'thumbnail': thumbnail,
  99             'uploader': uploader,
 100             'uploader_id': uploader_id,
 101             'timestamp': timestamp,
 102             'like_count': like_count,
 103             'view_count': view_count,
 104             'duration': duration,
 105             'age_limit': 18,
 106         }
 107
 108
 109 class FourTubeIE(FourTubeBaseIE):
 110     IE_NAME = '4tube'
 111     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?4tube\.com/(?:videos|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
 112     _URL_TEMPLATE = 'https://www.4tube.com/videos/%s/video'
 113     _TKN_HOST = 'token.4tube.com'
 114     _TESTS = [{
 115         'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
 116         'md5': '6516c8ac63b03de06bc8eac14362db4f',
 117         'info_dict': {
 118             'id': '209733',
 119             'ext': 'mp4',
 120             'title': 'Hot Babe Holly Michaels gets her ass stuffed by black',
 121             'uploader': 'WCP Club',
 122             'uploader_id': 'wcp-club',
 123             'upload_date': '20131031',
 124             'timestamp': 1383263892,
 125             'duration': 583,
 126             'view_count': int,
 127             'like_count': int,
 128             'categories': list,
 129             'age_limit': 18,
 130         },
 131     }, {
 132         'url': 'http://www.4tube.com/embed/209733',
 133         'only_matching': True,
 134     }, {
 135         'url': 'http://m.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black',
 136         'only_matching': True,
 137     }]
 138
 139
 140 class FuxIE(FourTubeBaseIE):
 141     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?fux\.com/(?:video|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
 142     _URL_TEMPLATE = 'https://www.fux.com/video/%s/video'
 143     _TKN_HOST = 'token.fux.com'
 144     _TESTS = [{
 145         'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
 146         'info_dict': {
 147             'id': '195359',
 148             'ext': 'mp4',
 149             'title': 'Awesome fucking in the kitchen ends with cum swallow',
 150             'uploader': 'alenci2342',
 151             'uploader_id': 'alenci2342',
 152             'upload_date': '20131230',
 153             'timestamp': 1388361660,
 154             'duration': 289,
 155             'view_count': int,
 156             'like_count': int,
 157             'categories': list,
 158             'age_limit': 18,
 159         },
 160         'params': {
 161             'skip_download': True,
 162         },
 163     }, {
 164         'url': 'https://www.fux.com/embed/195359',
 165         'only_matching': True,
 166     }, {
 167         'url': 'https://www.fux.com/video/195359/awesome-fucking-kitchen-ends-cum-swallow',
 168         'only_matching': True,
 169     }]
 170
 171
 172 class PornTubeIE(FourTubeBaseIE):
 173     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?porntube\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
 174     _URL_TEMPLATE = 'https://www.porntube.com/videos/video_%s'
 175     _TKN_HOST = 'tkn.porntube.com'
 176     _TESTS = [{
 177         'url': 'https://www.porntube.com/videos/teen-couple-doing-anal_7089759',
 178         'info_dict': {
 179             'id': '7089759',
 180             'ext': 'mp4',
 181             'title': 'Teen couple doing anal',
 182             'uploader': 'Alexy',
 183             'uploader_id': '91488',
 184             'upload_date': '20150606',
 185             'timestamp': 1433595647,
 186             'duration': 5052,
 187             'view_count': int,
 188             'like_count': int,
 189             'age_limit': 18,
 190         },
 191         'params': {
 192             'skip_download': True,
 193         },
 194     }, {
 195         'url': 'https://www.porntube.com/videos/squirting-teen-ballerina-ecg_1331406',
 196         'info_dict': {
 197             'id': '1331406',
 198             'ext': 'mp4',
 199             'title': 'Squirting Teen Ballerina on ECG',
 200             'uploader': 'Exploited College Girls',
 201             'uploader_id': '665',
 202             'channel': 'Exploited College Girls',
 203             'channel_id': '665',
 204             'upload_date': '20130920',
 205             'timestamp': 1379685485,
 206             'duration': 851,
 207             'view_count': int,
 208             'like_count': int,
 209             'age_limit': 18,
 210         },
 211         'params': {
 212             'skip_download': True,
 213         },
 214     }, {
 215         'url': 'https://www.porntube.com/embed/7089759',
 216         'only_matching': True,
 217     }, {
 218         'url': 'https://m.porntube.com/videos/teen-couple-doing-anal_7089759',
 219         'only_matching': True,
 220     }]
 221
 222     def _real_extract(self, url):
 223         mobj = self._match_valid_url(url)
 224         video_id, display_id = mobj.group('id', 'display_id')
 225
 226         webpage = self._download_webpage(url, display_id)
 227
 228         video = self._parse_json(
 229             self._search_regex(
 230                 r'INITIALSTATE\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
 231                 webpage, 'data', group='value'), video_id,
 232             transform_source=lambda x: urllib.parse.unquote(
 233                 base64.b64decode(x).decode('utf-8')))['page']['video']
 234
 235         title = video['title']
 236         media_id = video['mediaId']
 237         sources = [str(e['height'])
 238                    for e in video['encodings'] if e.get('height')]
 239         formats = self._extract_formats(url, video_id, media_id, sources)
 240
 241         thumbnail = url_or_none(video.get('masterThumb'))
 242         uploader = try_get(video, lambda x: x['user']['username'], str)
 243         uploader_id = str_or_none(try_get(
 244             video, lambda x: x['user']['id'], int))
 245         channel = try_get(video, lambda x: x['channel']['name'], str)
 246         channel_id = str_or_none(try_get(
 247             video, lambda x: x['channel']['id'], int))
 248         like_count = int_or_none(video.get('likes'))
 249         dislike_count = int_or_none(video.get('dislikes'))
 250         view_count = int_or_none(video.get('playsQty'))
 251         duration = int_or_none(video.get('durationInSeconds'))
 252         timestamp = unified_timestamp(video.get('publishedAt'))
 253
 254         return {
 255             'id': video_id,
 256             'title': title,
 257             'formats': formats,
 258             'thumbnail': thumbnail,
 259             'uploader': uploader or channel,
 260             'uploader_id': uploader_id or channel_id,
 261             'channel': channel,
 262             'channel_id': channel_id,
 263             'timestamp': timestamp,
 264             'like_count': like_count,
 265             'dislike_count': dislike_count,
 266             'view_count': view_count,
 267             'duration': duration,
 268             'age_limit': 18,
 269         }
 270
 271
 272 class PornerBrosIE(FourTubeBaseIE):
 273     _VALID_URL = r'https?://(?:(?P<kind>www|m)\.)?pornerbros\.com/(?:videos/(?P<display_id>[^/]+)_|embed/)(?P<id>\d+)'
 274     _URL_TEMPLATE = 'https://www.pornerbros.com/videos/video_%s'
 275     _TKN_HOST = 'token.pornerbros.com'
 276     _TESTS = [{
 277         'url': 'https://www.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
 278         'md5': '6516c8ac63b03de06bc8eac14362db4f',
 279         'info_dict': {
 280             'id': '181369',
 281             'ext': 'mp4',
 282             'title': 'Skinny brunette takes big cock down her anal hole',
 283             'uploader': 'PornerBros HD',
 284             'uploader_id': 'pornerbros-hd',
 285             'upload_date': '20130130',
 286             'timestamp': 1359527401,
 287             'duration': 1224,
 288             'view_count': int,
 289             'categories': list,
 290             'age_limit': 18,
 291         },
 292         'params': {
 293             'skip_download': True,
 294         },
 295     }, {
 296         'url': 'https://www.pornerbros.com/embed/181369',
 297         'only_matching': True,
 298     }, {
 299         'url': 'https://m.pornerbros.com/videos/skinny-brunette-takes-big-cock-down-her-anal-hole_181369',
 300         'only_matching': True,
 301     }]