3 from .common
import InfoExtractor
7 get_element_by_attribute
,
9 get_element_html_by_class
,
10 get_elements_by_class
,
16 from ..utils
.traversal
import traverse_obj
19 class Rule34VideoIE(InfoExtractor
):
20 _VALID_URL
= r
'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)'
23 'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/',
24 'md5': 'ffccac2c23799dabbd192621ae4d04f3',
28 'title': 'Shot It-(mmd hmv)',
29 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg',
35 'timestamp': 1639872000,
36 'description': 'https://discord.gg/aBqPrHSHvv',
37 'upload_date': '20211219',
38 'uploader': 'Sweet HMV',
39 'uploader_url': 'https://rule34video.com/members/22119/',
40 'categories': ['3D', 'MMD', 'iwara'],
41 'tags': 'mincount:10',
45 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/',
46 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86',
50 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]',
51 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg',
57 'timestamp': 1640131200,
59 'creators': ['WildeerStudio'],
60 'upload_date': '20211222',
61 'uploader': 'CerZule',
62 'uploader_url': 'https://rule34video.com/members/36281/',
63 'categories': ['3D', 'Tomb Raider'],
64 'tags': 'mincount:40',
69 def _real_extract(self
, url
):
70 video_id
= self
._match
_id
(url
)
71 webpage
= self
._download
_webpage
(url
, video_id
)
75 for mobj
in re
.finditer(r
'<a[^>]+href="(?P<video_url>[^"]+download=true[^"]+)".*>(?P<ext>[^\s]+) (?P<quality>[^<]+)p</a>', webpage
):
76 url
, ext
, quality
= mobj
.groups()
83 categories
, creators
, uploader
, uploader_url
= [None] * 4
84 for col
in get_elements_by_class('col', webpage
):
85 label
= clean_html(get_element_by_class('label', col
))
86 if label
== 'Categories:':
87 categories
= list(map(clean_html
, get_elements_by_class('item', col
)))
88 elif label
== 'Artist:':
89 creators
= list(map(clean_html
, get_elements_by_class('item', col
)))
90 elif label
== 'Uploaded By:':
91 uploader
= clean_html(get_element_by_class('name', col
))
92 uploader_url
= extract_attributes(get_element_html_by_class('name', col
) or '').get('href')
95 **traverse_obj(self
._search
_json
_ld
(webpage
, video_id
, default
={}), ({
97 'view_count': 'view_count',
98 'like_count': 'like_count',
99 'duration': 'duration',
100 'timestamp': 'timestamp',
101 'description': 'description',
102 'thumbnail': ('thumbnails', 0, 'url'),
106 'title': self
._html
_extract
_title
(webpage
),
107 'thumbnail': self
._html
_search
_regex
(
108 r
'preview_url:\s+\'([^
\']+)\'', webpage, 'thumbnail
', default=None),
109 'duration
': parse_duration(self._html_search_regex(
110 r'"icon-clock"></i
>\s
+<span
>((?
:\d
+:?
)+)', webpage, 'duration
', default=None)),
111 'view_count
': int_or_none(self._html_search_regex(
112 r'"icon-eye"></i
>\s
+<span
>([ \d
]+)', webpage, 'views
', default='').replace(' ', '')),
113 'like_count
': parse_count(get_element_by_class('voters count
', webpage)),
114 'comment_count
': int_or_none(self._search_regex(
115 r'[^
(]+\
((\d
+)\
)', get_element_by_attribute('href
', '#tab_comments', webpage), 'comment count', fatal=False)),
117 'creators': creators
,
118 'uploader': uploader
,
119 'uploader_url': uploader_url
,
120 'categories': categories
,
121 'tags': list(map(unescapeHTML
, re
.findall(
122 r
'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage
))),