yt_dlp/extractor/xvideos.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..compat import compat_urllib_parse_unquote
   5 from ..utils import (
   6     clean_html,
   7     determine_ext,
   8     ExtractorError,
   9     int_or_none,
  10     parse_duration,
  11 )
  12
  13
  14 class XVideosIE(InfoExtractor):
  15     _VALID_URL = r'''(?x)
  16                     https?://
  17                         (?:
  18                             (?:[^/]+\.)?xvideos2?\.com/video|
  19                             (?:www\.)?xvideos\.es/video|
  20                             (?:www|flashservice)\.xvideos\.com/embedframe/|
  21                             static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video=
  22                         )
  23                         (?P<id>[0-9]+)
  24                     '''
  25     _TESTS = [{
  26         'url': 'https://www.xvideos.com/video4588838/motorcycle_guy_cucks_influencer_steals_his_gf',
  27         'md5': '14cea69fcb84db54293b1e971466c2e1',
  28         'info_dict': {
  29             'id': '4588838',
  30             'ext': 'mp4',
  31             'title': 'Motorcycle Guy Cucks Influencer, Steals his GF',
  32             'duration': 108,
  33             'age_limit': 18,
  34             'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
  35         }
  36     }, {
  37         # Broken HLS formats
  38         'url': 'https://www.xvideos.com/video65982001/what_s_her_name',
  39         'md5': 'b82d7d7ef7d65a84b1fa6965f81f95a5',
  40         'info_dict': {
  41             'id': '65982001',
  42             'ext': 'mp4',
  43             'title': 'what\'s her name?',
  44             'duration': 120,
  45             'age_limit': 18,
  46             'thumbnail': r're:^https://img-hw.xvideos-cdn.com/.+\.jpg',
  47         }
  48     }, {
  49         'url': 'https://flashservice.xvideos.com/embedframe/4588838',
  50         'only_matching': True,
  51     }, {
  52         'url': 'https://www.xvideos.com/embedframe/4588838',
  53         'only_matching': True,
  54     }, {
  55         'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838',
  56         'only_matching': True,
  57     }, {
  58         'url': 'http://xvideos.com/video4588838/biker_takes_his_girl',
  59         'only_matching': True
  60     }, {
  61         'url': 'https://xvideos.com/video4588838/biker_takes_his_girl',
  62         'only_matching': True
  63     }, {
  64         'url': 'https://xvideos.es/video4588838/biker_takes_his_girl',
  65         'only_matching': True
  66     }, {
  67         'url': 'https://www.xvideos.es/video4588838/biker_takes_his_girl',
  68         'only_matching': True
  69     }, {
  70         'url': 'http://xvideos.es/video4588838/biker_takes_his_girl',
  71         'only_matching': True
  72     }, {
  73         'url': 'http://www.xvideos.es/video4588838/biker_takes_his_girl',
  74         'only_matching': True
  75     }, {
  76         'url': 'http://fr.xvideos.com/video4588838/biker_takes_his_girl',
  77         'only_matching': True
  78     }, {
  79         'url': 'https://fr.xvideos.com/video4588838/biker_takes_his_girl',
  80         'only_matching': True
  81     }, {
  82         'url': 'http://it.xvideos.com/video4588838/biker_takes_his_girl',
  83         'only_matching': True
  84     }, {
  85         'url': 'https://it.xvideos.com/video4588838/biker_takes_his_girl',
  86         'only_matching': True
  87     }, {
  88         'url': 'http://de.xvideos.com/video4588838/biker_takes_his_girl',
  89         'only_matching': True
  90     }, {
  91         'url': 'https://de.xvideos.com/video4588838/biker_takes_his_girl',
  92         'only_matching': True
  93     }]
  94
  95     def _real_extract(self, url):
  96         video_id = self._match_id(url)
  97         webpage = self._download_webpage(url, video_id)
  98
  99         mobj = re.search(r'<h1 class="inlineError">(.+?)</h1>', webpage)
 100         if mobj:
 101             raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True)
 102
 103         title = self._html_search_regex(
 104             (r'<title>(?P<title>.+?)\s+-\s+XVID',
 105              r'setVideoTitle\s*\(\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
 106             webpage, 'title', default=None,
 107             group='title') or self._og_search_title(webpage)
 108
 109         thumbnails = []
 110         for preference, thumbnail in enumerate(('', '169')):
 111             thumbnail_url = self._search_regex(
 112                 r'setThumbUrl%s\(\s*(["\'])(?P<thumbnail>(?:(?!\1).)+)\1' % thumbnail,
 113                 webpage, 'thumbnail', default=None, group='thumbnail')
 114             if thumbnail_url:
 115                 thumbnails.append({
 116                     'url': thumbnail_url,
 117                     'preference': preference,
 118                 })
 119
 120         duration = int_or_none(self._og_search_property(
 121             'duration', webpage, default=None)) or parse_duration(
 122             self._search_regex(
 123                 r'<span[^>]+class=["\']duration["\'][^>]*>.*?(\d[^<]+)',
 124                 webpage, 'duration', fatal=False))
 125
 126         formats = []
 127
 128         video_url = compat_urllib_parse_unquote(self._search_regex(
 129             r'flv_url=(.+?)&', webpage, 'video URL', default=''))
 130         if video_url:
 131             formats.append({
 132                 'url': video_url,
 133                 'format_id': 'flv',
 134             })
 135
 136         for kind, _, format_url in re.findall(
 137                 r'setVideo([^(]+)\((["\'])(http.+?)\2\)', webpage):
 138             format_id = kind.lower()
 139             if format_id == 'hls':
 140                 hls_formats = self._extract_m3u8_formats(
 141                     format_url, video_id, 'mp4',
 142                     entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
 143                 self._check_formats(hls_formats, video_id)
 144                 formats.extend(hls_formats)
 145             elif format_id in ('urllow', 'urlhigh'):
 146                 formats.append({
 147                     'url': format_url,
 148                     'format_id': '%s-%s' % (determine_ext(format_url, 'mp4'), format_id[3:]),
 149                     'quality': -2 if format_id.endswith('low') else None,
 150                 })
 151
 152         return {
 153             'id': video_id,
 154             'formats': formats,
 155             'title': title,
 156             'duration': duration,
 157             'thumbnails': thumbnails,
 158             'age_limit': 18,
 159         }
 160
 161
 162 class XVideosQuickiesIE(InfoExtractor):
 163     IE_NAME = 'xvideos:quickies'
 164     _VALID_URL = r'https?://(?P<domain>(?:[^/]+\.)?xvideos2?\.com)/amateur-channels/[^#]+#quickies/a/(?P<id>\d+)'
 165     _TESTS = [{
 166         'url': 'https://www.xvideos.com/amateur-channels/wifeluna#quickies/a/47258683',
 167         'md5': '16e322a93282667f1963915568f782c1',
 168         'info_dict': {
 169             'id': '47258683',
 170             'ext': 'mp4',
 171             'title': 'Verification video',
 172             'age_limit': 18,
 173             'duration': 16,
 174             'thumbnail': r're:^https://cdn.*-pic.xvideos-cdn.com/.+\.jpg',
 175         }
 176     }]
 177
 178     def _real_extract(self, url):
 179         domain, id_ = self._match_valid_url(url).group('domain', 'id')
 180         return self.url_result(f'https://{domain}/video{id_}/_', XVideosIE, id_)