[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / alphaporno.py
blob7b74d5524d65ae15e7559947bb9008493ba8da95
1 from .common import InfoExtractor
2 from ..utils import (
3 int_or_none,
4 parse_duration,
5 parse_filesize,
6 parse_iso8601,
10 class AlphaPornoIE(InfoExtractor):
11 _VALID_URL = r'https?://(?:www\.)?alphaporno\.com/videos/(?P<id>[^/]+)'
12 _TEST = {
13 'url': 'http://www.alphaporno.com/videos/sensual-striptease-porn-with-samantha-alexandra/',
14 'md5': 'feb6d3bba8848cd54467a87ad34bd38e',
15 'info_dict': {
16 'id': '258807',
17 'display_id': 'sensual-striptease-porn-with-samantha-alexandra',
18 'ext': 'mp4',
19 'title': 'Sensual striptease porn with Samantha Alexandra',
20 'thumbnail': r're:https?://.*\.jpg$',
21 'timestamp': 1418694611,
22 'upload_date': '20141216',
23 'duration': 387,
24 'filesize_approx': 54120000,
25 'tbr': 1145,
26 'categories': list,
27 'age_limit': 18,
31 def _real_extract(self, url):
32 display_id = self._match_id(url)
34 webpage = self._download_webpage(url, display_id)
36 video_id = self._search_regex(
37 r"video_id\s*:\s*'([^']+)'", webpage, 'video id', default=None)
39 video_url = self._search_regex(
40 r"video_url\s*:\s*'([^']+)'", webpage, 'video url')
41 ext = self._html_search_meta(
42 'encodingFormat', webpage, 'ext', default='.mp4')[1:]
44 title = self._search_regex(
45 [r'<meta content="([^"]+)" itemprop="description">',
46 r'class="title" itemprop="name">([^<]+)<'],
47 webpage, 'title')
48 thumbnail = self._html_search_meta('thumbnail', webpage, 'thumbnail')
49 timestamp = parse_iso8601(self._html_search_meta(
50 'uploadDate', webpage, 'upload date'))
51 duration = parse_duration(self._html_search_meta(
52 'duration', webpage, 'duration'))
53 filesize_approx = parse_filesize(self._html_search_meta(
54 'contentSize', webpage, 'file size'))
55 bitrate = int_or_none(self._html_search_meta(
56 'bitrate', webpage, 'bitrate'))
57 categories = self._html_search_meta(
58 'keywords', webpage, 'categories', default='').split(',')
60 age_limit = self._rta_search(webpage)
62 return {
63 'id': video_id,
64 'display_id': display_id,
65 'url': video_url,
66 'ext': ext,
67 'title': title,
68 'thumbnail': thumbnail,
69 'timestamp': timestamp,
70 'duration': duration,
71 'filesize_approx': filesize_approx,
72 'tbr': bitrate,
73 'categories': categories,
74 'age_limit': age_limit,