5 from .common
import InfoExtractor
15 class ThisVidIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+)'
18 'url': 'https://thisvid.com/videos/sitting-on-ball-tight-jeans/',
19 'md5': '839becb572995687e11a69dc4358a386',
23 'title': 'Sitting on ball tight jeans',
24 'description': 'md5:372353bb995883d1b65fddf507489acd',
25 'thumbnail': r
're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
26 'uploader_id': '150629',
27 'uploader': 'jeanslevisjeans',
28 'display_id': 'sitting-on-ball-tight-jeans',
32 'url': 'https://thisvid.com/embed/3533241/',
33 'md5': '839becb572995687e11a69dc4358a386',
37 'title': 'Sitting on ball tight jeans',
38 'thumbnail': r
're:https?://\w+\.thisvid\.com/(?:[^/]+/)+3533241/preview\.jpg',
39 'uploader_id': '150629',
40 'uploader': 'jeanslevisjeans',
41 'display_id': 'sitting-on-ball-tight-jeans',
46 def _real_extract(self
, url
):
47 main_id
, type_
= re
.match(self
._VALID
_URL
, url
).group('id', 'type')
48 webpage
= self
._download
_webpage
(url
, main_id
)
50 title
= self
._html
_search
_regex
(
51 r
'<title\b[^>]*?>(?:Video:\s+)?(.+?)(?:\s+-\s+ThisVid(?:\.com| tube))?</title>',
55 # look for more metadata
56 video_alt_url
= url_or_none(self
._search
_regex
(
57 rf
'''video_alt_url\s*:\s+'({self._VALID_URL}/)',''',
58 webpage
, 'video_alt_url', default
=None))
59 if video_alt_url
and video_alt_url
!= url
:
60 webpage
= self
._download
_webpage
(
61 video_alt_url
, main_id
,
62 note
='Redirecting embed to main page', fatal
=False) or webpage
64 video_holder
= get_element_by_class('video-holder', webpage
) or ''
65 if '>This video is a private video' in video_holder
:
66 self
.raise_login_required(
67 (clean_html(video_holder
) or 'Private video').partition('\n')[0])
69 uploader
= self
._html
_search
_regex
(
70 r
'''(?s)<span\b[^>]*>Added by:\s*</span><a\b[^>]+\bclass\s*=\s*["']author\b[^>]+\bhref\s*=\s*["']https://thisvid\.com/members/([0-9]+/.{3,}?)\s*</a>''',
71 webpage
, 'uploader', default
='')
72 uploader
= re
.split(r
'''/["'][^>]*>\s*''', uploader
)
73 if len(uploader
) == 2:
74 # id must be non-empty, uploader could be ''
75 uploader_id
, uploader
= uploader
76 uploader
= uploader
or None
78 uploader_id
= uploader
= None
80 return self
.url_result(
81 url
, ie
='Generic', url_transparent
=True,
85 uploader_id
=uploader_id
)
88 class ThisVidPlaylistBaseIE(InfoExtractor
):
89 _PLAYLIST_URL_RE
= None
92 def _find_urls(cls
, html
):
93 for m
in re
.finditer(rf
'''<a\b[^>]+\bhref\s*=\s*["'](?P<url>{cls._PLAYLIST_URL_RE}\b)[^>]+>''', html
):
96 def _generate_playlist_entries(self
, url
, playlist_id
, html
=None):
98 for page
in itertools
.count(1):
100 html
= self
._download
_webpage
(
101 page_url
, playlist_id
, note
=f
'Downloading page {page}',
104 yield from self
._find
_urls
(html
)
106 next_page
= get_element_by_class('pagination-next', html
) or ''
109 next_page
= urljoin(url
, self
._search
_regex
(
110 r
'''<a\b[^>]+\bhref\s*=\s*("|')(?P<url>(?!#)(?:(?!\1).)+)''',
111 next_page
, 'next page link', group
='url', default
=None))
113 # in case a member page should have pagination-next with empty link, not just `else:`
114 if next_page
is None:
116 parsed_url
= urllib
.parse
.urlparse(page_url
)
117 base_path
, _
, num
= parsed_url
.path
.rpartition('/')
118 num
= int_or_none(num
)
120 base_path
, num
= parsed_url
.path
.rstrip('/'), 1
121 parsed_url
= parsed_url
._replace
(path
=f
'{base_path}/{num + 1}')
122 next_page
= urllib
.parse
.urlunparse(parsed_url
)
123 if page_url
== next_page
:
128 page_url
, html
= next_page
, None
130 def _make_playlist_result(self
, url
):
131 playlist_id
= self
._match
_id
(url
)
132 webpage
= self
._download
_webpage
(url
, playlist_id
)
135 r
'(?i)\s*\|\s*ThisVid\.com\s*$',
136 self
._og
_search
_title
(webpage
, default
=None)
137 or self
._html
_search
_regex
(r
'(?s)<title\b[^>]*>(.+?)</title', webpage
, 'title', fatal
=False) or '', maxsplit
=1)[0] or None
139 return self
.playlist_from_matches(
140 self
._generate
_playlist
_entries
(url
, playlist_id
, webpage
),
141 playlist_id
=playlist_id
, playlist_title
=title
, ie
=ThisVidIE
)
144 class ThisVidMemberIE(ThisVidPlaylistBaseIE
):
145 _VALID_URL
= r
'https?://thisvid\.com/members/(?P<id>\d+)'
147 'url': 'https://thisvid.com/members/2140501/',
150 'title': 'Rafflesia\'s Profile',
152 'playlist_mincount': 16,
154 'url': 'https://thisvid.com/members/2140501/favourite_videos/',
157 'title': 'Rafflesia\'s Favourite Videos',
159 'playlist_mincount': 15,
161 'url': 'https://thisvid.com/members/636468/public_videos/',
164 'title': 'Happymouth\'s Public Videos',
166 'playlist_mincount': 196,
168 _PLAYLIST_URL_RE
= ThisVidIE
._VALID
_URL
170 def _real_extract(self
, url
):
171 return self
._make
_playlist
_result
(url
)
174 class ThisVidPlaylistIE(ThisVidPlaylistBaseIE
):
175 _VALID_URL
= r
'https?://thisvid\.com/playlist/(?P<id>\d+)/video/(?P<video_id>[A-Za-z0-9-]+)'
177 'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
180 'title': 'Underwear Stuff',
182 'playlist_mincount': 200,
184 'url': 'https://thisvid.com/playlist/6615/video/big-italian-booty-28/',
188 'title': 'Big Italian Booty 28',
189 'description': 'md5:1bccf7b13765e18fb27bf764dba7ede2',
190 'uploader_id': '367912',
191 'uploader': 'Jcmusclefun',
193 'display_id': 'big-italian-booty-28',
194 'thumbnail': r
're:https?://\w+\.thisvid\.com/(?:[^/]+/)+1072387/preview\.jpg',
200 _PLAYLIST_URL_RE
= _VALID_URL
202 def _generate_playlist_entries(self
, url
, playlist_id
, html
=None):
203 for wrapped_url
in super()._generate
_playlist
_entries
(url
, playlist_id
, html
):
204 video_id
= re
.match(self
._VALID
_URL
, wrapped_url
).group('video_id')
205 yield urljoin(url
, f
'/videos/{video_id}/')
207 def _real_extract(self
, url
):
208 playlist_id
, video_id
= self
._match
_valid
_url
(url
).group('id', 'video_id')
210 if not self
._yes
_playlist
(playlist_id
, video_id
):
211 redirect_url
= urljoin(url
, f
'/videos/{video_id}/')
212 return self
.url_result(redirect_url
, ThisVidIE
)
214 result
= self
._make
_playlist
_result
(url
)
216 # Fix duplicated title (`the title - the title` => `the title`)
217 title
= result
['title']
219 if t_len
> 5 and t_len
% 2 != 0:
221 if title
[t_len
] == '-':
222 first
, second
= map(str.strip
, (title
[:t_len
], title
[t_len
+ 1:]))
223 if first
and first
== second
:
224 result
['title'] = first