yt_dlp/extractor/pornflip.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     int_or_none,
   4     parse_duration,
   5     parse_iso8601
   6 )
   7
   8
   9 class PornFlipIE(InfoExtractor):
  10     _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P<id>[^/]+)'
  11     _TESTS = [
  12         {
  13             'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou',
  14             'info_dict': {
  15                 'id': 'dzv9Mtw1qj2',
  16                 'ext': 'mp4',
  17                 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou',
  18                 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821',
  19                 'duration': 476,
  20                 'like_count': int,
  21                 'dislike_count': int,
  22                 'view_count': int,
  23                 'timestamp': 1617846819,
  24                 'upload_date': '20210408',
  25                 'uploader': 'Brazzers',
  26                 'age_limit': 18,
  27             },
  28             'params': {
  29                 'skip_download': True,
  30             },
  31         },
  32         {
  33             'url': 'https://www.pornflip.com/v/IrJEC40i21L',
  34             'only_matching': True,
  35         },
  36         {
  37             'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple',
  38             'only_matching': True,
  39         },
  40         {
  41             'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU',
  42             'only_matching': True,
  43         },
  44     ]
  45     _HOST = 'www.pornflip.com'
  46
  47     def _real_extract(self, url):
  48         video_id = self._match_id(url)
  49         webpage = self._download_webpage(
  50             'https://{}/sv/{}'.format(self._HOST, video_id), video_id, headers={'host': self._HOST})
  51         description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False)
  52         duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False)
  53         view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False)
  54         title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)</a>', webpage, 'title', fatal=False)
  55         uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*<a[^>]*>([^<]+)<', webpage, 'uploader', fatal=False)
  56         upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False)
  57         likes = self._html_search_regex(
  58             r'class="btn btn-up-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False)
  59         dislikes = self._html_search_regex(
  60             r'class="btn btn-down-rating[^>]*>[^<]*<i[^>]*>[^<]*</i>[^>]*<span[^>]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False)
  61         mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&amp;', '&')
  62         formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash')
  63
  64         return {
  65             'age_limit': 18,
  66             'description': description,
  67             'dislike_count': int_or_none(dislikes),
  68             'duration': parse_duration(duration),
  69             'formats': formats,
  70             'id': video_id,
  71             'like_count': int_or_none(likes),
  72             'timestamp': parse_iso8601(upload_date),
  73             'thumbnail': self._og_search_thumbnail(webpage),
  74             'title': title,
  75             'uploader': uploader,
  76             'view_count': int_or_none(view_count),
  77         }