4 from .common
import InfoExtractor
11 get_elements_html_by_class
,
23 class YouPornIE(InfoExtractor
):
24 _VALID_URL
= r
'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?/?(?:[#?]|$)'
25 _EMBED_REGEX
= [r
'<iframe[^>]+\bsrc=["\'](?P
<url
>(?
:https?
:)?
//(?
:www\
.)?youporn\
.com
/embed
/\d
+)']
27 'url
': 'http
://www
.youporn
.com
/watch
/505835/sex
-ed
-is-it
-safe
-to
-masturbate
-daily
/',
28 'md5
': '3744d24c50438cf5b6f6d59feb5055c2
',
31 'display_id
': 'sex
-ed
-is-it
-safe
-to
-masturbate
-daily
',
33 'title
': 'Sex Ed
: Is It Safe To Masturbate Daily?
',
34 'description
': 'Love
& Sex Answers
: http
://bit
.ly
/DanAndJenn
-- Is It Unhealthy To Masturbate Daily?
',
35 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
37 'uploader
': 'Ask Dan And Jennifer
',
38 'upload_date
': '20101217',
39 'average_rating
': int,
45 'skip
': 'This video has been deactivated
',
48 'url
': 'http
://www
.youporn
.com
/watch
/561726/big
-tits
-awesome
-brunette
-on
-amazing
-webcam
-show
/?
from=related3
&al
=2&from_id
=561726&pos
=4',
51 'display_id
': 'big
-tits
-awesome
-brunette
-on
-amazing
-webcam
-show
',
53 'title
': 'Big Tits Awesome Brunette On amazing webcam show
',
54 'description
': 'http
://sweetlivegirls
.com Big Tits Awesome Brunette On amazing webcam show
.mp4
',
55 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
56 'uploader
': 'Unknown
',
57 'upload_date
': '20110418',
58 'average_rating
': int,
65 'skip_download
': True,
69 'url
': 'https
://www
.youporn
.com
/embed
/505835/sex
-ed
-is-it
-safe
-to
-masturbate
-daily
/',
70 'only_matching
': True,
72 'url
': 'http
://www
.youporn
.com
/watch
/505835',
73 'only_matching
': True,
75 'url
': 'https
://www
.youporn
.com
/watch
/13922959/femdom
-principal
/',
76 'only_matching
': True,
78 'url
': 'https
://www
.youporn
.com
/watch
/16290308/tinderspecial
-trailer1
/',
83 'display_id
': 'tinderspecial
-trailer1
',
86 'upload_date
': '20201123',
87 'uploader
': 'Ersties
',
89 'thumbnail
': r're
:https
://.+\
.jpg
',
90 'timestamp
': 1606147564,
91 'title
': 'Tinder In Real Life
',
96 def _real_extract(self, url):
97 video_id, display_id = self._match_valid_url(url).group('id', 'display_id
')
98 self._set_cookie('.youporn
.com
', 'age_verified
', '1')
99 webpage = self._download_webpage(f'https
://www
.youporn
.com
/watch
/{video_id}
', video_id)
101 watchable = self._search_regex(
102 r'''(<div\s[^>]*\bid\s*=\s*('|
")?watch-container(?(2)\2|(?!-)\b)[^>]*>)''',
103 webpage, 'watchability', default=None)
105 msg = re.split(r'\s{2}', clean_html(get_element_by_id('mainContent', webpage)) or '')[0]
106 raise ExtractorError(
107 f'{self.IE_NAME} says: {msg}' if msg else 'Video unavailable', expected=True)
109 player_vars = self._search_json(r'\bplayervars\s*:', webpage, 'player vars', video_id)
110 definitions = player_vars['mediaDefinitions']
112 def get_format_data(data, stream_type):
113 info_url = traverse_obj(data, (lambda _, v: v['format'] == stream_type, 'videoUrl', {url_or_none}, any))
117 self._download_json(info_url, video_id, f'Downloading {stream_type} info JSON', fatal=False),
118 lambda _, v: v['format'] == stream_type and url_or_none(v['videoUrl']))
121 # Try to extract only the actual master m3u8 first, avoiding the duplicate single resolution "master
" m3u8s
122 for hls_url in traverse_obj(get_format_data(definitions, 'hls'), (
123 lambda _, v: not isinstance(v['defaultQuality'], bool), 'videoUrl'), (..., 'videoUrl')):
124 formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', fatal=False, m3u8_id='hls'))
126 for definition in get_format_data(definitions, 'mp4'):
127 f = traverse_obj(definition, {
129 'filesize': ('videoSize', {int_or_none}),
131 height = int_or_none(definition.get('quality'))
132 # Video URL's path looks like this:
133 # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
134 # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
135 # /videos/201703/11/109285532/1080P_4000K_109285532.mp4
136 # We will benefit from it by extracting some metadata
137 mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', definition['videoUrl'])
140 height = int(mobj.group('height'))
141 bitrate = int(mobj.group('bitrate'))
143 'format_id': f'{height}p-{bitrate}k',
149 title = self._html_search_regex(
150 r'(?s)<div[^>]+class=["\']watchVideoTitle
[^
>]+>(.+?
)</div
>',
151 webpage, 'title
', default=None) or self._og_search_title(
152 webpage, default=None) or self._html_search_meta(
153 'title
', webpage, fatal=True)
155 description = self._html_search_regex(
156 r'(?s
)<div
[^
>]+\bid
=["\']description["\'][^
>]*>(.+?
)</div
>',
157 webpage, 'description
',
158 default=None) or self._og_search_description(
159 webpage, default=None)
160 thumbnail = self._search_regex(
161 r'(?
:imageurl\s
*=|poster\s
*:)\s
*(["\'])(?P<thumbnail>.+?)\1',
162 webpage, 'thumbnail', fatal=False, group='thumbnail')
163 duration = traverse_obj(player_vars, ('duration', {int_or_none}))
165 duration = int_or_none(self._html_search_meta(
166 'video:duration', webpage, 'duration', fatal=False))
168 uploader = self._html_search_regex(
169 r'(?s)<div[^>]+class=["\']submitByLink
["\'][^>]*>(.+?)</div>',
170 webpage, 'uploader', fatal=False)
171 upload_date = unified_strdate(self._html_search_regex(
172 (r'UPLOADED:\s*<span>([^<]+)',
173 r'Date\s+[Aa]dded:\s*<span>([^<]+)',
174 r'''(?s)<div[^>]+class=["']videoInfo(?:Date|Time)\b[^>]*>(.+?)</div>''',
175 r'(?s
)<label
\b[^
>]*>Uploaded
[^
<]*</label
>\s
*<span
\b[^
>]*>(.+?
)</span
>'),
176 webpage, 'upload date
', fatal=False))
178 age_limit = self._rta_search(webpage)
181 views = self._search_regex(
182 r'(<div
[^
>]*\bdata
-value\s
*=[^
>]+>)\s
*<label
>Views
:</label
>',
183 webpage, 'views
', default=None)
185 view_count = parse_count(extract_attributes(views).get('data
-value
'))
186 comment_count = parse_count(self._search_regex(
187 r'>All
[Cc
]omments? \
(([\d
,.]+)\
)',
188 webpage, 'comment count
', default=None))
190 def extract_tag_box(regex, title):
191 tag_box = self._search_regex(regex, webpage, title, default=None)
194 return re.findall(r'<a
[^
>]+href
=[^
>]+>([^
<]+)', tag_box)
196 categories = extract_tag_box(
197 r'(?s
)Categories
:.*?
</[^
>]+>(.+?
)</div
>', 'categories
')
198 tags = extract_tag_box(
199 r'(?s
)Tags
:.*?
</div
>\s
*<div
[^
>]+class=["\']tagBoxContent["\'][^
>]*>(.+?
)</div
>',
202 data = self._search_json_ld(webpage, video_id, expected_type='VideoObject
', fatal=False)
203 data.pop('url
', None)
205 result = merge_dicts(data, {
207 'display_id
': display_id,
209 'description
': description,
210 'thumbnail
': thumbnail,
211 'duration
': duration,
212 'uploader
': uploader,
213 'upload_date
': upload_date,
214 'view_count
': view_count,
215 'comment_count
': comment_count,
216 'categories
': categories,
218 'age_limit
': age_limit,
222 # Remove SEO spam "description"
223 description = result.get('description
')
224 if description and description.startswith(f'Watch
{result
.get("title")} online
'):
225 del result['description
']
230 class YouPornListBase(InfoExtractor):
231 def _get_next_url(self, url, pl_id, html):
232 return urljoin(url, self._search_regex(
233 r'''<a [^>]*?\bhref\s*=\s*("|')(?P
<url
>(?
:(?
!\
1)[^
>])+)\
1''',
234 get_element_by_id('next', html) or '', 'next page',
235 group='url', default=None))
238 def _get_title_from_slug(cls, title_slug):
239 return re.sub(r'[_-]', ' ', title_slug)
241 def _entries(self, url, pl_id, html=None, page_num=None):
242 start = page_num or 1
243 for page in itertools.count(start):
245 html = self._download_webpage(
246 url, pl_id, note=f'Downloading page {page}', fatal=page == start)
249 for element in get_elements_html_by_class('video-title', html):
250 if video_url := traverse_obj(element, ({extract_attributes}, 'href', {lambda x: urljoin(url, x)})):
251 yield self.url_result(video_url)
253 if page_num is not None:
255 next_url = self._get_next_url(url, pl_id, html)
256 if not next_url or next_url == url:
261 def _real_extract(self, url, html=None):
262 m_dict = self._match_valid_url(url).groupdict()
263 pl_id, page_type, sort = (m_dict.get(k) for k in ('id', 'type', 'sort'))
264 qs = {k: v[-1] for k, v in parse_qs(url).items() if v}
266 base_id = pl_id or 'YouPorn'
267 title = self._get_title_from_slug(base_id)
269 title = f'{page_type.capitalize()} {title}'
270 base_id = [base_id.lower()]
274 title = f'{title} videos by {re.sub(r"[_-]", " ", sort)}'
277 filters = list(map('='.join, sorted(qs.items())))
278 title += f' ({",".join(filters)})'
279 base_id.extend(filters)
280 pl_id = '/'.join(base_id)
282 return self.playlist_result(
283 self._entries(url, pl_id, html=html, page_num=int_or_none(qs.get('page'))),
284 playlist_id=pl_id, playlist_title=title)
287 class YouPornCategoryIE(YouPornListBase):
288 IE_DESC = 'YouPorn category, with sorting, filtering and pagination'
289 _VALID_URL = r'''(?x
)
290 https?
://(?
:www\
.)?youporn\
.com
/
291 (?P
<type>category
)/(?P
<id>[^
/?
#&]+)
292 (?
:/(?P
<sort
>popular|views|rating|time|duration
))?
/?
(?
:[#?]|$)
295 'note': 'Full list with pagination',
296 'url': 'https://www.youporn.com/category/popular-with-women/popular/',
298 'id': 'popular-with-women/popular',
299 'title': 'Category popular with women videos by popular',
301 'playlist_mincount': 39,
303 'note': 'Filtered paginated list with single page result',
304 'url': 'https://www.youporn.com/category/popular-with-women/duration/?min_minutes=10',
306 'id': 'popular-with-women/duration/min_minutes=10',
307 'title': 'Category popular with women videos by duration (min_minutes=10)',
309 'playlist_mincount': 2,
310 # 'playlist_maxcount': 30,
312 'note': 'Single page of full list',
313 'url': 'https://www.youporn.com/category/popular-with-women/popular?page=1',
315 'id': 'popular-with-women/popular/page=1',
316 'title': 'Category popular with women videos by popular (page=1)',
318 'playlist_count': 36,
322 class YouPornChannelIE(YouPornListBase):
323 IE_DESC = 'YouPorn channel, with sorting and pagination'
324 _VALID_URL = r'''(?x
)
325 https?
://(?
:www\
.)?youporn\
.com
/
326 (?P
<type>channel
)/(?P
<id>[^
/?
#&]+)
327 (?
:/(?P
<sort
>rating|views|duration
))?
/?
(?
:[#?]|$)
330 'note': 'Full list with pagination',
331 'url': 'https://www.youporn.com/channel/x-feeds/',
334 'title': 'Channel X-Feeds videos',
336 'playlist_mincount': 37,
338 'note': 'Single page of full list (no filters here)',
339 'url': 'https://www.youporn.com/channel/x-feeds/duration?page=1',
341 'id': 'x-feeds/duration/page=1',
342 'title': 'Channel X-Feeds videos by duration (page=1)',
344 'playlist_count': 24,
348 def _get_title_from_slug(title_slug):
349 return re.sub(r'_', ' ', title_slug).title()
352 class YouPornCollectionIE(YouPornListBase):
353 IE_DESC = 'YouPorn collection (user playlist), with sorting and pagination'
354 _VALID_URL = r'''(?x
)
355 https?
://(?
:www\
.)?youporn\
.com
/
356 (?P
<type>collection
)s
/videos
/(?P
<id>\d
+)
357 (?
:/(?P
<sort
>rating|views|time|duration
))?
/?
(?
:[#?]|$)
360 'note': 'Full list with pagination',
361 'url': 'https://www.youporn.com/collections/videos/33044251/',
364 'title': 'Collection Sexy Lips videos',
365 'uploader': 'ph-littlewillyb',
367 'playlist_mincount': 50,
369 'note': 'Single page of full list (no filters here)',
370 'url': 'https://www.youporn.com/collections/videos/33044251/time?page=1',
372 'id': '33044251/time/page=1',
373 'title': 'Collection Sexy Lips videos by time (page=1)',
374 'uploader': 'ph-littlewillyb',
376 'playlist_count': 20,
379 def _real_extract(self, url):
380 pl_id = self._match_id(url)
381 html = self._download_webpage(url, pl_id)
382 playlist = super()._real_extract(url, html=html)
383 infos = re.sub(r'\s+', ' ', clean_html(get_element_by_class(
384 'collection-infos', html)) or '')
385 title, uploader = self._search_regex(
386 r'^\s*Collection: (?P<title>.+?) \d+ VIDEOS \d+ VIEWS \d+ days LAST UPDATED From: (?P<uploader>[\w_-]+)',
387 infos, 'title/uploader', group=('title', 'uploader'), default=(None, None))
390 'title': playlist['title'].replace(playlist['id'].split('/')[0], title),
391 'uploader': uploader,
397 class YouPornTagIE(YouPornListBase):
398 IE_DESC = 'YouPorn tag (porntags), with sorting, filtering and pagination'
399 _VALID_URL = r'''(?x
)
400 https?
://(?
:www\
.)?youporn\
.com
/
401 porn(?P
<type>tag
)s
/(?P
<id>[^
/?
#&]+)
402 (?
:/(?P
<sort
>views|rating|time|duration
))?
/?
(?
:[#?]|$)
405 'note': 'Full list with pagination',
406 'url': 'https://www.youporn.com/porntags/austrian',
409 'title': 'Tag austrian videos',
411 'playlist_mincount': 33,
412 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
414 'note': 'Filtered paginated list with single page result',
415 'url': 'https://www.youporn.com/porntags/austrian/duration/?min_minutes=10',
417 'id': 'austrian/duration/min_minutes=10',
418 'title': 'Tag austrian videos by duration (min_minutes=10)',
420 'playlist_mincount': 10,
421 # number of videos per page is (row x col) 2x3 + 6x4 + 2, or + 3,
422 # or more, varying with number of ads; let's set max as 9x4
423 # NB col 1 may not be shown in non-JS page with site CSS and zoom 100%
424 # 'playlist_maxcount': 32,
425 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
427 'note': 'Single page of full list',
428 'url': 'https://www.youporn.com/porntags/austrian/?page=1',
430 'id': 'austrian/page=1',
431 'title': 'Tag austrian videos (page=1)',
433 'playlist_mincount': 32,
434 # 'playlist_maxcount': 34,
435 'expected_warnings': ['YouPorn tag pages are not correctly cached'],
438 def _real_extract(self, url):
440 'YouPorn tag pages are not correctly cached and '
441 'often return incorrect results', only_once=True)
442 return super()._real_extract(url)
445 class YouPornStarIE(YouPornListBase):
446 IE_DESC = 'YouPorn Pornstar, with description, sorting and pagination'
447 _VALID_URL = r'''(?x
)
448 https?
://(?
:www\
.)?youporn\
.com
/
449 (?P
<type>pornstar
)/(?P
<id>[^
/?
#&]+)
450 (?
:/(?P
<sort
>rating|views|duration
))?
/?
(?
:[#?]|$)
453 'note': 'Full list with pagination',
454 'url': 'https://www.youporn.com/pornstar/daynia/',
457 'title': 'Pornstar Daynia videos',
458 'description': r're:Daynia Rank \d+ Videos \d+ Views [\d,.]+ .+ Subscribers \d+',
460 'playlist_mincount': 40,
462 'note': 'Single page of full list (no filters here)',
463 'url': 'https://www.youporn.com/pornstar/daynia/?page=1',
465 'id': 'daynia/page=1',
466 'title': 'Pornstar Daynia videos (page=1)',
467 'description': 're:.{180,}',
469 'playlist_count': 26,
473 def _get_title_from_slug(title_slug):
474 return re.sub(r'_', ' ', title_slug).title()
476 def _real_extract(self, url):
477 pl_id = self._match_id(url)
478 html = self._download_webpage(url, pl_id)
479 playlist = super()._real_extract(url, html=html)
480 INFO_ELEMENT_RE = r'''(?x
)
481 <div
[^
>]*\bclass\s
*=\s
*('|")(?:[\w$-]+\s+|\s)*?pornstar-info-wrapper(?:\s+[\w$-]+|\s)*\1[^>]*>
482 (?P<info>[\s\S]+?)(?:</div>\s*){6,}
485 if infos := self._search_regex(INFO_ELEMENT_RE, html, 'infos
', group='info
', default=''):
487 r'(?
:\s
*nl
=nl
)+\s
*', ' ',
488 re.sub(r'(?u
)\s
+', ' ', clean_html(re.sub('\n', 'nl
=nl
', infos)))).replace('ribe Subsc
', '')
492 'description
': infos.strip() or None,
496 class YouPornVideosIE(YouPornListBase):
497 IE_DESC = 'YouPorn
video (browse
) playlists
, with sorting
, filtering
and pagination
'
498 _VALID_URL = r'''(?x)
499 https?://(?:www\.)?youporn\.com/
502 (?:duration|rating|time|views)|
503 (?:most_(?:favou?rit|view)ed|recommended|top_rated)?))
507 'note
': 'Full
list with
pagination (too
long for test
)',
508 'url
': 'https
://www
.youporn
.com
/',
511 'title
': 'YouPorn videos
',
513 'only_matching
': True,
515 'note
': 'Full
list with
pagination (too
long for test
)',
516 'url
': 'https
://www
.youporn
.com
/recommended
',
518 'id': 'youporn
/recommended
',
519 'title
': 'YouPorn videos by recommended
',
521 'only_matching
': True,
523 'note
': 'Full
list with
pagination (too
long for test
)',
524 'url
': 'https
://www
.youporn
.com
/top_rated
',
526 'id': 'youporn
/top_rated
',
527 'title
': 'YouPorn videos by top rated
',
529 'only_matching
': True,
531 'note
': 'Full
list with
pagination (too
long for test
)',
532 'url
': 'https
://www
.youporn
.com
/browse
/time
',
535 'title
': 'YouPorn videos by time
',
537 'only_matching
': True,
539 'note
': 'Filtered paginated
list with single page result
',
540 'url
': 'https
://www
.youporn
.com
/most_favorited
/?res
=VR
&max_minutes
=2',
542 'id': 'youporn
/most_favorited
/max_minutes
=2/res
=VR
',
543 'title
': 'YouPorn videos by most
favorited (max_minutes
=2,res
=VR
)',
545 'playlist_mincount
': 10,
546 # 'playlist_maxcount
': 28,
548 'note
': 'Filtered paginated
list with several pages
',
549 'url
': 'https
://www
.youporn
.com
/most_favorited
/?res
=VR
&max_minutes
=5',
551 'id': 'youporn
/most_favorited
/max_minutes
=5/res
=VR
',
552 'title
': 'YouPorn videos by most
favorited (max_minutes
=5,res
=VR
)',
554 'playlist_mincount
': 45,
556 'note
': 'Single page of full
list',
557 'url
': 'https
://www
.youporn
.com
/browse
/time?page
=1',
559 'id': 'browse
/time
/page
=1',
560 'title
': 'YouPorn videos by
time (page
=1)',
562 'playlist_count
': 36,
566 def _get_title_from_slug(title_slug):
567 return 'YouPorn
' if title_slug == 'browse
' else title_slug