[cleanup] Misc (#8968)
[yt-dlp.git] / yt_dlp / extractor / peekvids.py
blob939c26dc7a146a1a9949bf50aa61623ed733cb3d
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 get_element_by_class,
7 int_or_none,
8 merge_dicts,
9 url_or_none,
13 class PeekVidsBaseIE(InfoExtractor):
14 def _real_extract(self, url):
15 domain, video_id = self._match_valid_url(url).group('domain', 'id')
16 webpage = self._download_webpage(url, video_id, expected_status=429)
17 if '>Rate Limit Exceeded' in webpage:
18 raise ExtractorError(
19 f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}',
20 video_id=video_id, expected=True)
22 title = self._html_search_regex(r'(?s)<h1\b[^>]*>(.+?)</h1>', webpage, 'title')
24 display_id = video_id
25 video_id = self._search_regex(r'(?s)<video\b[^>]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID')
26 srcs = self._download_json(
27 f'https://www.{domain}/v-alt/{video_id}', video_id,
28 note='Downloading list of source files')
30 formats = []
31 for k, v in srcs.items():
32 f_url = url_or_none(v)
33 if not f_url:
34 continue
36 height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None)
37 if not height:
38 continue
40 formats.append({
41 'url': f_url,
42 'format_id': height,
43 'height': int_or_none(height),
46 if not formats:
47 formats = [{'url': url} for url in srcs.values()]
49 info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
50 info.pop('url', None)
52 # may not have found the thumbnail if it was in a list in the ld+json
53 info.setdefault('thumbnail', self._og_search_thumbnail(webpage))
54 detail = (get_element_by_class('detail-video-block', webpage)
55 or get_element_by_class('detail-block', webpage) or '')
56 info['description'] = self._html_search_regex(
57 rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|<ul\b)',
58 detail, 'description', default=None) or None
59 info['title'] = re.sub(r'\s*[,-][^,-]+$', '', info.get('title') or title) or self._generic_title(url)
61 def cat_tags(name, html):
62 l = self._html_search_regex(
63 rf'(?s)<span\b[^>]*>\s*{re.escape(name)}\s*:\s*</span>(.+?)</li>',
64 html, name, default='')
65 return list(filter(None, re.split(r'\s+', l)))
67 return merge_dicts({
68 'id': video_id,
69 'display_id': display_id,
70 'age_limit': 18,
71 'formats': formats,
72 'categories': cat_tags('Categories', detail),
73 'tags': cat_tags('Tags', detail),
74 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None),
75 }, info)
78 class PeekVidsIE(PeekVidsBaseIE):
79 _VALID_URL = r'''(?x)
80 https?://(?:www\.)?(?P<domain>peekvids\.com)/
81 (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=)
82 (?P<id>[^/?&#]*)
83 '''
84 _TESTS = [{
85 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
86 'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
87 'info_dict': {
88 'id': '1262717',
89 'display_id': 'BSyLMbN0YCd',
90 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
91 'ext': 'mp4',
92 'thumbnail': r're:^https?://.*\.jpg$',
93 'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
94 'timestamp': 1642579329,
95 'upload_date': '20220119',
96 'duration': 416,
97 'view_count': int,
98 'age_limit': 18,
99 'uploader': 'SEXYhub.com',
100 'categories': list,
101 'tags': list,
106 class PlayVidsIE(PeekVidsBaseIE):
107 _VALID_URL = r'https?://(?:www\.)?(?P<domain>playvids\.com)/(?:embed/|\w\w?/)?(?P<id>[^/?#]*)'
108 _TESTS = [{
109 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
110 'md5': '2f12e50213dd65f142175da633c4564c',
111 'info_dict': {
112 'id': '1978030',
113 'display_id': 'U3pBrYhsjXM',
114 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp',
115 'ext': 'mp4',
116 'thumbnail': r're:^https?://.*\.jpg$',
117 'description': 'md5:0a61df3620de26c0af8963b1a730cd69',
118 'timestamp': 1640435839,
119 'upload_date': '20211225',
120 'duration': 416,
121 'view_count': int,
122 'age_limit': 18,
123 'uploader': 'SEXYhub.com',
124 'categories': list,
125 'tags': list,
127 }, {
128 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
129 'only_matching': True,
130 }, {
131 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
132 'only_matching': True,
133 }, {
134 'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line',
135 'md5': 'e783986e596cafbf46411a174ab42ba6',
136 'info_dict': {
137 'id': '762385',
138 'display_id': 'bKmGLe3IwjZ',
139 'ext': 'mp4',
140 'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6',
141 'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef',
142 'timestamp': 1516958544,
143 'upload_date': '20180126',
144 'thumbnail': r're:^https?://.*\.jpg$',
145 'duration': 480,
146 'uploader': 'Brazzers',
147 'age_limit': 18,
148 'view_count': int,
149 'categories': list,
150 'tags': list,
152 }, {
153 'url': 'https://www.playvids.com/v/47iUho33toY',
154 'md5': 'b056b5049d34b648c1e86497cf4febce',
155 'info_dict': {
156 'id': '700621',
157 'display_id': '47iUho33toY',
158 'ext': 'mp4',
159 'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE',
160 'timestamp': 1507052209,
161 'upload_date': '20171003',
162 'thumbnail': r're:^https?://.*\.jpg$',
163 'duration': 332,
164 'uploader': 'Cacerenele',
165 'age_limit': 18,
166 'view_count': int,
167 'categories': list,
168 'tags': list,
170 }, {
171 'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances',
172 'md5': 'efa09be9f031314b7b7e3bc6510cd0df',
173 'info_dict': {
174 'id': '1523518',
175 'display_id': 'z3_7iwWCmqt',
176 'ext': 'mp4',
177 'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances',
178 'timestamp': 1607470323,
179 'upload_date': '20201208',
180 'thumbnail': r're:^https?://.*\.jpg$',
181 'duration': 593,
182 'uploader': 'yorours',
183 'age_limit': 18,
184 'view_count': int,
185 'categories': list,
186 'tags': list,