[ie/youtube] Fix `uploader_id` extraction (#11818)
[yt-dlp.git] / yt_dlp / extractor / zenporn.py
blob084c1fc4f3a385c4d78f5574f05fc06fa68154c4
1 import base64
2 import binascii
4 from .common import InfoExtractor
5 from ..utils import ExtractorError, determine_ext, unified_strdate, url_or_none
6 from ..utils.traversal import traverse_obj
9 class ZenPornIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?zenporn\.com/video/(?P<id>\d+)'
11 _TESTS = [{
12 'url': 'https://zenporn.com/video/15627016/desi-bhabi-ki-chudai',
13 'md5': '07bd576b5920714d74975c054ca28dee',
14 'info_dict': {
15 'id': '9563799',
16 'display_id': '15627016',
17 'ext': 'mp4',
18 'title': 'md5:669eafd3bbc688aa29770553b738ada2',
19 'description': '',
20 'thumbnail': 'md5:2fc044a19bab450fef8f1931e7920a18',
21 'upload_date': '20230925',
22 'uploader': 'md5:9fae59847f1f58d1da8f2772016c12f3',
23 'age_limit': 18,
25 }, {
26 'url': 'https://zenporn.com/video/15570701',
27 'md5': 'acba0d080d692664fcc8c4e5502b1a67',
28 'info_dict': {
29 'id': '2297875',
30 'display_id': '15570701',
31 'ext': 'mp4',
32 'title': 'md5:47aebdf87644ec91e8b1a844bc832451',
33 'description': '',
34 'thumbnail': 'https://mstn.nv7s.com/contents/videos_screenshots/2297000/2297875/480x270/1.jpg',
35 'upload_date': '20230921',
36 'uploader': 'Lois Clarke',
37 'age_limit': 18,
39 }, {
40 'url': 'https://zenporn.com/video/8531117/amateur-students-having-a-fuck-fest-at-club/',
41 'md5': '67411256aa9451449e4d29f3be525541',
42 'info_dict': {
43 'id': '12791908',
44 'display_id': '8531117',
45 'ext': 'mp4',
46 'title': 'Amateur students having a fuck fest at club',
47 'description': '',
48 'thumbnail': 'https://tn.txxx.tube/contents/videos_screenshots/12791000/12791908/288x162/1.jpg',
49 'upload_date': '20191005',
50 'uploader': 'Jackopenass',
51 'age_limit': 18,
53 }, {
54 'url': 'https://zenporn.com/video/15872038/glad-you-came/',
55 'md5': '296ccab437f5bac6099433768449d8e1',
56 'info_dict': {
57 'id': '111585',
58 'display_id': '15872038',
59 'ext': 'mp4',
60 'title': 'Glad You Came',
61 'description': '',
62 'thumbnail': 'https://vpim.m3pd.com/contents/videos_screenshots/111000/111585/480x270/1.jpg',
63 'upload_date': '20231024',
64 'uploader': 'Martin Rudenko',
65 'age_limit': 18,
69 def _gen_info_url(self, ext_domain, extr_id, lifetime=86400):
70 """ This function is a reverse engineering from the website javascript """
71 result = '/'.join(str(int(extr_id) // i * i) for i in (1_000_000, 1_000, 1))
72 return f'https://{ext_domain}/api/json/video/{lifetime}/{result}.json'
74 @staticmethod
75 def _decode_video_url(encoded_url):
76 """ This function is a reverse engineering from the website javascript """
77 # Replace lookalike characters and standardize map
78 translation = str.maketrans('АВСЕМ.,~', 'ABCEM+/=')
79 try:
80 return base64.b64decode(encoded_url.translate(translation), validate=True).decode()
81 except (binascii.Error, ValueError):
82 return None
84 def _real_extract(self, url):
85 display_id = self._match_id(url)
86 webpage = self._download_webpage(url, display_id)
88 ext_domain, video_id = self._search_regex(
89 r'https://(?P<ext_domain>[\w.-]+\.\w{3})/embed/(?P<extr_id>\d+)/',
90 webpage, 'embed info', group=('ext_domain', 'extr_id'))
92 info_json = self._download_json(
93 self._gen_info_url(ext_domain, video_id), video_id, fatal=False)
95 video_json = self._download_json(
96 f'https://{ext_domain}/api/videofile.php', video_id, query={
97 'video_id': video_id,
98 'lifetime': 8640000,
99 }, note='Downloading video file JSON', errnote='Failed to download video file JSON')
101 decoded_url = self._decode_video_url(video_json[0]['video_url'])
102 if not decoded_url:
103 raise ExtractorError('Unable to decode the video url')
105 return {
106 'id': video_id,
107 'display_id': display_id,
108 'ext': traverse_obj(video_json, (0, 'format', {determine_ext})),
109 'url': f'https://{ext_domain}{decoded_url}',
110 'age_limit': 18,
111 **traverse_obj(info_json, ('video', {
112 'title': ('title', {str}),
113 'description': ('description', {str}),
114 'thumbnail': ('thumb', {url_or_none}),
115 'upload_date': ('post_date', {unified_strdate}),
116 'uploader': ('user', 'username', {str}),
117 })),