3 from .common
import InfoExtractor
4 from ..networking
.exceptions
import HTTPError
14 class WykopBaseExtractor(InfoExtractor
):
15 def _get_token(self
, force_refresh
=False):
17 maybe_cached
= self
.cache
.load('wykop', 'bearer')
21 new_token
= traverse_obj(
22 self
._do
_call
_api
('auth', None, 'Downloading anonymous auth token', data
={
23 # hardcoded in frontend
24 'key': 'w53947240748',
25 'secret': 'd537d9e0a7adc1510842059ae5316419',
26 }), ('data', 'token'))
28 self
.cache
.store('wykop', 'bearer', new_token
)
31 def _do_call_api(self
, path
, video_id
, note
='Downloading JSON metadata', data
=None, headers
={}):
33 data
= json
.dumps({'data': data
}).encode()
34 headers
['Content-Type'] = 'application/json'
36 return self
._download
_json
(
37 f
'https://wykop.pl/api/v3/{path}', video_id
,
38 note
=note
, data
=data
, headers
=headers
)
40 def _call_api(self
, path
, video_id
, note
='Downloading JSON metadata'):
41 token
= self
._get
_token
()
42 for retrying
in range(2):
44 return self
._do
_call
_api
(path
, video_id
, note
, headers
={'Authorization': f
'Bearer {token}'})
45 except ExtractorError
as e
:
46 if not retrying
and isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 403:
47 token
= self
._get
_token
(True)
51 def _common_data_extract(self
, data
):
52 author
= traverse_obj(data
, ('author', 'username'), expected_type
=str)
55 '_type': 'url_transparent',
56 'display_id': data
.get('slug'),
57 'url': traverse_obj(data
,
58 ('media', 'embed', 'url'), # what gets an iframe embed
59 ('source', 'url'), # clickable url (dig only)
60 expected_type
=url_or_none
),
61 'thumbnail': traverse_obj(
62 data
, ('media', 'photo', 'url'), ('media', 'embed', 'thumbnail'), expected_type
=url_or_none
),
64 'uploader_id': author
,
65 'uploader_url': format_field(author
, None, 'https://wykop.pl/ludzie/%s'),
66 'timestamp': parse_iso8601(data
.get('created_at'), delimiter
=' '), # time it got submitted
67 'like_count': traverse_obj(data
, ('votes', 'up'), expected_type
=int),
68 'dislike_count': traverse_obj(data
, ('votes', 'down'), expected_type
=int),
69 'comment_count': traverse_obj(data
, ('comments', 'count'), expected_type
=int),
70 'age_limit': 18 if data
.get('adult') else 0,
71 'tags': data
.get('tags'),
75 class WykopDigIE(WykopBaseExtractor
):
77 _VALID_URL
= r
'https?://(?:www\.)?wykop\.pl/link/(?P<id>\d+)'
80 'url': 'https://wykop.pl/link/6912923/najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth',
84 'title': 'Najbardziej zrzędliwy kot na świecie I Frozen Planet II I BBC Earth',
85 'display_id': 'najbardziej-zrzedliwy-kot-na-swiecie-i-frozen-planet-ii-i-bbc-earth',
86 'description': 'md5:ac0f87dea1cdcb6b0c53f3612a095c87',
87 'tags': ['zwierzaczki', 'koty', 'smiesznykotek', 'humor', 'rozrywka', 'ciekawostki'],
89 'timestamp': 1669154480,
90 'release_timestamp': 1669194241,
91 'release_date': '20221123',
92 'uploader': 'starnak',
93 'uploader_id': 'starnak',
94 'uploader_url': 'https://wykop.pl/ludzie/starnak',
98 'thumbnail': r
're:https?://wykop\.pl/cdn/.+',
100 'channel': 'BBC Earth',
101 'channel_id': 'UCwmZiChSryoWQCZMIQezgTg',
102 'channel_url': 'https://www.youtube.com/channel/UCwmZiChSryoWQCZMIQezgTg',
103 'categories': ['Pets & Animals'],
104 'upload_date': '20220923',
106 'channel_follower_count': int,
107 'availability': 'public',
108 'live_status': 'not_live',
109 'playable_in_embed': True,
114 def suitable(cls
, url
):
115 return cls
._match
_valid
_url
(url
) and not WykopDigCommentIE
.suitable(url
)
117 def _real_extract(self
, url
):
118 video_id
= self
._match
_id
(url
)
119 data
= self
._call
_api
(f
'links/{video_id}', video_id
)['data']
122 **self
._common
_data
_extract
(data
),
124 'title': data
['title'],
125 'description': data
.get('description'),
126 # time it got "digged" to the homepage
127 'release_timestamp': parse_iso8601(data
.get('published_at'), delimiter
=' '),
131 class WykopDigCommentIE(WykopBaseExtractor
):
132 IE_NAME
= 'wykop:dig:comment'
133 _VALID_URL
= r
'https?://(?:www\.)?wykop\.pl/link/(?P<dig_id>\d+)/[^/]+/komentarz/(?P<id>\d+)'
136 'url': 'https://wykop.pl/link/6992589/strollowal-oszusta-przez-ponad-24-minuty-udawal-naiwniaka-i-nagral-rozmowe/komentarz/114540527/podobna-sytuacja-ponizej-ciekawa-dyskusja-z-oszustem-na-sam-koniec-sam-bylem-w-biurze-swiadkiem-podobnej-rozmowy-niemal-zakonczonej-sukcesem-bandyty-g',
140 'title': 'md5:e7c741c5baa7ed6478000caf72865577',
141 'display_id': 'md5:45b2d12bd0e262d09cc7cf7abc8412db',
142 'description': 'md5:bcec7983429f9c0630f9deb9d3d1ba5e',
143 'timestamp': 1674476945,
144 'uploader': 'Bartholomew',
145 'uploader_id': 'Bartholomew',
146 'uploader_url': 'https://wykop.pl/ludzie/Bartholomew',
147 'thumbnail': r
're:https?://wykop\.pl/cdn/.+',
149 'availability': 'public',
151 'upload_date': '20230117',
152 'categories': ['Entertainment'],
155 'dislike_count': int,
156 'comment_count': int,
157 'channel_follower_count': int,
158 'playable_in_embed': True,
159 'live_status': 'not_live',
161 'chapters': 'count:3',
162 'channel': 'Poszukiwacze Okazji',
163 'channel_id': 'UCzzvJDZThwv06dR4xmzrZBw',
164 'channel_url': 'https://www.youtube.com/channel/UCzzvJDZThwv06dR4xmzrZBw',
168 def _real_extract(self
, url
):
169 dig_id
, comment_id
= self
._search
_regex
(self
._VALID
_URL
, url
, 'dig and comment ids', group
=('dig_id', 'id'))
170 data
= self
._call
_api
(f
'links/{dig_id}/comments/{comment_id}', comment_id
)['data']
173 **self
._common
_data
_extract
(data
),
175 'title': f
"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
176 'description': data
.get('content'),
180 class WykopPostIE(WykopBaseExtractor
):
181 IE_NAME
= 'wykop:post'
182 _VALID_URL
= r
'https?://(?:www\.)?wykop\.pl/wpis/(?P<id>\d+)'
185 'url': 'https://wykop.pl/wpis/68893343/kot-koty-smiesznykotek',
187 'id': 'PL8JMjiUPHUhwc9ZlKa_5IFeBwBV8Xe7jI',
188 'title': 'PawelW124 - #kot #koty #smiesznykotek',
189 'description': '#kot #koty #smiesznykotek',
190 'display_id': 'kot-koty-smiesznykotek',
191 'tags': ['kot', 'koty', 'smiesznykotek'],
192 'uploader': 'PawelW124',
193 'uploader_id': 'PawelW124',
194 'uploader_url': 'https://wykop.pl/ludzie/PawelW124',
195 'timestamp': 1668938142,
198 'dislike_count': int,
199 'thumbnail': r
're:https?://wykop\.pl/cdn/.+',
200 'comment_count': int,
202 'channel_id': 'UCW9T_-uZoiI7ROARQdTDyOw',
203 'channel_url': 'https://www.youtube.com/channel/UCW9T_-uZoiI7ROARQdTDyOw',
204 'upload_date': '20221120',
205 'modified_date': '20220814',
206 'availability': 'public',
209 'playlist_mincount': 15,
211 'flat_playlist': True,
216 def suitable(cls
, url
):
217 return cls
._match
_valid
_url
(url
) and not WykopPostCommentIE
.suitable(url
)
219 def _real_extract(self
, url
):
220 video_id
= self
._match
_id
(url
)
221 data
= self
._call
_api
(f
'entries/{video_id}', video_id
)['data']
224 **self
._common
_data
_extract
(data
),
226 'title': f
"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
227 'description': data
.get('content'),
231 class WykopPostCommentIE(WykopBaseExtractor
):
232 IE_NAME
= 'wykop:post:comment'
233 _VALID_URL
= r
'https?://(?:www\.)?wykop\.pl/wpis/(?P<post_id>\d+)/[^/#]+#(?P<id>\d+)'
236 'url': 'https://wykop.pl/wpis/70084873/test-test-test#249303979',
238 'id': 'confusedquickarmyant',
240 'title': 'tpap - treść komentarza',
241 'display_id': 'tresc-komentarza',
242 'description': 'treść komentarza',
244 'uploader_id': 'tpap',
245 'uploader_url': 'https://wykop.pl/ludzie/tpap',
246 'timestamp': 1675349470,
247 'upload_date': '20230202',
254 'dislike_count': int,
255 'thumbnail': r
're:https?://wykop\.pl/cdn/.+',
259 def _real_extract(self
, url
):
260 post_id
, comment_id
= self
._search
_regex
(self
._VALID
_URL
, url
, 'post and comment ids', group
=('post_id', 'id'))
261 data
= self
._call
_api
(f
'entries/{post_id}/comments/{comment_id}', comment_id
)['data']
264 **self
._common
_data
_extract
(data
),
266 'title': f
"{traverse_obj(data, ('author', 'username'))} - {data.get('content') or ''}",
267 'description': data
.get('content'),