11 from .common
import InfoExtractor
12 from ..compat
import compat_ord
28 class CDAIE(InfoExtractor
):
29 _VALID_URL
= r
'https?://(?:(?:www\.)?cda\.pl/video|ebd\.cda\.pl/[0-9]+x[0-9]+)/(?P<id>[0-9a-z]+)'
30 _NETRC_MACHINE
= 'cdapl'
32 _BASE_URL
= 'https://www.cda.pl'
33 _BASE_API_URL
= 'https://api.cda.pl'
35 'Accept': 'application/vnd.cda.public+json',
37 # hardcoded in the app
38 _LOGIN_REQUEST_AUTH
= 'Basic YzU3YzBlZDUtYTIzOC00MWQwLWI2NjQtNmZmMWMxY2Y2YzVlOklBTm95QlhRRVR6U09MV1hnV3MwMW0xT2VyNWJNZzV4clRNTXhpNGZJUGVGZ0lWUlo5UGVYTDhtUGZaR1U1U3Q'
39 _BEARER_CACHE
= 'cda-bearer'
42 'url': 'http://www.cda.pl/video/5749950c',
43 'md5': '6f844bf51b15f31fae165365707ae970',
48 'title': 'Oto dlaczego przed zakrętem należy zwolnić.',
49 'description': 'md5:269ccd135d550da90d1662651fcb9772',
50 'thumbnail': r
're:^https?://.*\.jpg$',
51 'average_rating': float,
54 'upload_date': '20160221',
55 'timestamp': 1456078244,
58 'url': 'http://www.cda.pl/video/57413289',
59 'md5': 'a88828770a8310fc00be6c95faf7f4d5',
63 'title': 'Lądowanie na lotnisku na Maderze',
64 'description': 'md5:60d76b71186dcce4e0ba6d4bbdb13e1a',
65 'thumbnail': r
're:^https?://.*\.jpg$',
66 'uploader': 'crash404',
67 'average_rating': float,
70 'upload_date': '20160220',
71 'timestamp': 1455968218,
74 # Age-restricted with vfilm redirection
75 'url': 'https://www.cda.pl/video/8753244c4',
76 'md5': 'd8eeb83d63611289507010d3df3bb8b3',
80 'title': '[18+] Bez Filtra: Rezerwowe Psy czyli... najwulgarniejsza polska gra?',
81 'description': 'md5:ae80bac31bd6a9f077a6cce03c7c077e',
83 'uploader': 'arhn eu',
84 'thumbnail': r
're:^https?://.*\.jpg$',
87 'average_rating': float,
88 'timestamp': 1633888264,
89 'upload_date': '20211010',
92 # Age-restricted without vfilm redirection
93 'url': 'https://www.cda.pl/video/17028157b8',
94 'md5': 'c1fe5ff4582bace95d4f0ce0fbd0f992',
98 'title': 'STENDUPY MICHAŁ OGIŃSKI',
99 'description': 'md5:5851f3272bfc31f762d616040a1d609a',
101 'uploader': 'oginski',
102 'thumbnail': r
're:^https?://.*\.jpg$',
105 'average_rating': float,
106 'timestamp': 1699705901,
107 'upload_date': '20231111',
110 'url': 'http://ebd.cda.pl/0x0/5749950c',
111 'only_matching': True,
114 def _download_age_confirm_page(self
, url
, video_id
, *args
, **kwargs
):
115 data
, content_type
= multipart_encode({'age_confirm': ''})
116 return self
._download
_webpage
(
117 url
, video_id
, *args
,
120 'Content-Type': content_type
,
123 def _perform_login(self
, username
, password
):
124 app_version
= random
.choice((
125 '1.2.88 build 15306',
126 '1.2.174 build 18469',
128 android_version
= random
.randrange(8, 14)
129 phone_model
= random
.choice((
130 # x-kom.pl top selling Android smartphones, as of 2022-12-26
131 # https://www.x-kom.pl/g-4/c/1590-smartfony-i-telefony.html?f201-system-operacyjny=61322-android
133 'Motorola edge 20 5G',
134 'Motorola edge 30 neo 5G',
136 'OnePlus Nord 2T 5G',
137 'Samsung Galaxy A32 SM‑A325F',
138 'Samsung Galaxy M13',
139 'Samsung Galaxy S20 FE 5G',
141 'Xiaomi POCO M4 Pro',
144 'Xiaomi Redmi 9C NFC',
145 'Xiaomi Redmi Note 10 Pro',
146 'Xiaomi Redmi Note 11 Pro',
147 'Xiaomi Redmi Note 11',
148 'Xiaomi Redmi Note 11S 5G',
149 'Xiaomi Redmi Note 11S',
154 self
._API
_HEADERS
['User-Agent'] = f
'pl.cda 1.0 (version {app_version}; Android {android_version}; {phone_model})'
156 cached_bearer
= self
.cache
.load(self
._BEARER
_CACHE
, username
) or {}
157 if cached_bearer
.get('valid_until', 0) > dt
.datetime
.now().timestamp() + 5:
158 self
._API
_HEADERS
['Authorization'] = f
'Bearer {cached_bearer["token"]}'
161 password_hash
= base64
.urlsafe_b64encode(hmac
.new(
162 b
's01m1Oer5IANoyBXQETzSOLWXgWs01m1Oer5bMg5xrTMMxRZ9Pi4fIPeFgIVRZ9PeXL8mPfXQETZGUAN5StRZ9P',
163 ''.join(f
'{bytes((bt & 255, )).hex():0>2}'
164 for bt
in hashlib
.md5(password
.encode()).digest()).encode(),
165 hashlib
.sha256
).digest()).decode().replace('=', '')
167 token_res
= self
._download
_json
(
168 f
'{self._BASE_API_URL}/oauth/token', None, 'Logging in', data
=b
'',
169 headers
={**self
._API
_HEADERS
, 'Authorization': self
._LOGIN
_REQUEST
_AUTH
},
171 'grant_type': 'password',
173 'password': password_hash
,
175 self
.cache
.store(self
._BEARER
_CACHE
, username
, {
176 'token': token_res
['access_token'],
177 'valid_until': token_res
['expires_in'] + dt
.datetime
.now().timestamp(),
179 self
._API
_HEADERS
['Authorization'] = f
'Bearer {token_res["access_token"]}'
181 def _real_extract(self
, url
):
182 video_id
= self
._match
_id
(url
)
184 if 'Authorization' in self
._API
_HEADERS
:
185 return self
._api
_extract
(video_id
)
187 return self
._web
_extract
(video_id
)
189 def _api_extract(self
, video_id
):
190 meta
= self
._download
_json
(
191 f
'{self._BASE_API_URL}/video/{video_id}', video_id
, headers
=self
._API
_HEADERS
)['video']
193 uploader
= traverse_obj(meta
, 'author', 'login')
196 'url': quality
['file'],
197 'format': quality
.get('title'),
198 'resolution': quality
.get('name'),
199 'height': try_call(lambda: int(quality
['name'][:-1])),
200 'filesize': quality
.get('length'),
201 } for quality
in meta
['qualities'] if quality
.get('file')]
203 if meta
.get('premium') and not meta
.get('premium_free') and not formats
:
204 raise ExtractorError(
205 'Video requires CDA Premium - subscription needed', expected
=True)
209 'title': meta
.get('title'),
210 'description': meta
.get('description'),
211 'uploader': None if uploader
== 'anonim' else uploader
,
212 'average_rating': float_or_none(meta
.get('rating')),
213 'thumbnail': meta
.get('thumb'),
215 'duration': meta
.get('duration'),
216 'age_limit': 18 if meta
.get('for_adults') else 0,
217 'view_count': meta
.get('views'),
220 def _web_extract(self
, video_id
):
221 self
._set
_cookie
('cda.pl', 'cda.player', 'html5')
222 webpage
, urlh
= self
._download
_webpage
_handle
(
223 f
'{self._BASE_URL}/video/{video_id}/vfilm', video_id
)
225 if 'Ten film jest dostępny dla użytkowników premium' in webpage
:
226 self
.raise_login_required('This video is only available for premium users')
228 if re
.search(r
'niedostępn[ey] w(?: |\s+)Twoim kraju\s*<', webpage
):
229 self
.raise_geo_restricted()
231 need_confirm_age
= False
232 if self
._html
_search
_regex
(r
'(<button[^>]+name="[^"]*age_confirm[^"]*")',
233 webpage
, 'birthday validate form', default
=None):
234 webpage
= self
._download
_age
_confirm
_page
(
235 urlh
.url
, video_id
, note
='Confirming age')
236 need_confirm_age
= True
240 uploader
= self
._search
_regex
(r
'''(?x)
241 <(span|meta)[^>]+itemprop=(["\'])author\
2[^
>]*>
242 (?
:<\
1[^
>]*>[^
<]*</\
1>|
(?
!</\
1>)(?
:.|
\n))*?
243 <(span|meta
)[^
>]+itemprop
=(["\'])name\4[^>]*>(?P<uploader>[^<]+)</\3>
244 ''', webpage, 'uploader', default=None, group='uploader')
245 average_rating = self._search_regex(
246 (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\
1[^
>]*>(?P
<rating_value
>[0-9.]+)',
247 r'<span
[^
>]+\bclass
=["\']rating["\'][^
>]*>(?P
<rating_value
>[0-9.]+)'), webpage, 'rating
', fatal=False,
248 group='rating_value
')
252 'title
': self._og_search_title(webpage),
253 'description
': self._og_search_description(webpage),
254 'uploader
': uploader,
255 'average_rating
': float_or_none(average_rating),
256 'thumbnail
': self._og_search_thumbnail(webpage),
259 'age_limit
': 18 if need_confirm_age else 0,
262 info = self._search_json_ld(webpage, video_id, default={})
264 # Source: https://www.cda.pl/js/player.js?t=1606154898
266 for p in ('_XDDD
', '_CDA
', '_ADC
', '_CXD
', '_QWE
', '_Q5
', '_IKSDE
'):
268 a = urllib.parse.unquote(a)
272 b.append(chr(33 + (f + 14) % 94) if 33 <= f <= 126 else chr(f))
274 a = a.replace('.cda
.mp4
', '')
275 for p in ('.2cda
.pl
', '.3cda
.pl
'):
276 a = a.replace(p, '.cda
.pl
')
278 a = a.replace('/upstream
', '.mp4
/upstream
')
279 return 'https
://' + a
280 return 'https
://' + a + '.mp4
'
282 def extract_format(page, version):
283 json_str = self._html_search_regex(
284 r'player_data
=(\\?
["\'])(?P<player_data>.+?)\1', page,
285 f'{version} player_json', fatal=False, group='player_data')
288 player_data = self._parse_json(
289 json_str, f'{version} player_data', fatal=False)
292 video = player_data.get('video')
293 if not video or 'file' not in video:
294 self.report_warning(f'Unable to extract {version} version information')
296 if video['file'].startswith('uggc'):
297 video['file'] = codecs.decode(video['file'], 'rot_13')
298 if video['file'].endswith('adc.mp4'):
299 video['file'] = video['file'].replace('adc.mp4', '.mp4')
300 elif not video['file'].startswith('http'):
301 video['file'] = decrypt_file(video['file'])
302 video_quality = video.get('quality')
303 qualities = video.get('qualities', {})
304 video_quality = next((k for k, v in qualities.items() if v == video_quality), video_quality)
305 info_dict['formats'].append({
306 'url': video['file'],
307 'format_id': video_quality,
308 'height': int_or_none(video_quality[:-1]),
310 for quality, cda_quality in qualities.items():
311 if quality == video_quality:
313 data = {'jsonrpc': '2.0', 'method': 'videoGetLink', 'id': 2,
314 'params': [video_id, cda_quality, video.get('ts'), video.get('hash2'), {}]}
315 data = json.dumps(data).encode()
316 video_url = self._download_json(
317 f'https://www.cda.pl/video/{video_id}', video_id, headers={
318 'Content-Type': 'application/json',
319 'X-Requested-With': 'XMLHttpRequest',
320 }, data=data, note=f'Fetching {quality} url',
321 errnote=f'Failed to fetch {quality} url', fatal=False)
322 if try_get(video_url, lambda x: x['result']['status']) == 'ok':
323 video_url = try_get(video_url, lambda x: x['result']['resp'])
324 info_dict['formats'].append({
326 'format_id': quality,
327 'height': int_or_none(quality[:-1]),
330 if not info_dict['duration']:
331 info_dict['duration'] = parse_duration(video.get('duration'))
333 extract_format(webpage, 'default')
335 for href, resolution in re.findall(
336 r'<a[^>]+data-quality="[^
"]+"[^
>]+href
="([^"]+)"[^>]+class="quality
-btn
"[^>]*>([0-9]+p)',
339 handler = self._download_age_confirm_page
341 handler = self._download_webpage
344 urljoin(self._BASE_URL, href), video_id,
345 f'Downloading {resolution} version information', fatal=False)
347 # Manually report warning because empty page is returned when
348 # invalid version is requested.
349 self.report_warning(f'Unable to download {resolution} version information')
352 extract_format(webpage, resolution)
354 return merge_dicts(info_dict, info)
357 class CDAFolderIE(InfoExtractor):
359 _VALID_URL = r'https?://(?:www\.)?cda\.pl/(?P<channel>\w+)/folder/(?P<id>\d+)'
362 'url': 'https://www.cda.pl/domino264/folder/31188385',
365 'title': 'SERIA DRUGA',
367 'playlist_mincount': 13,
370 'url': 'https://www.cda.pl/smiechawaTV/folder/2664592/vfilm',
373 'title': 'VideoDowcipy - wszystkie odcinki',
375 'playlist_mincount': 71,
378 'url': 'https://www.cda.pl/DeliciousBeauty/folder/19129979/vfilm',
381 'title': 'TESTY KOSMETYKÓW',
383 'playlist_mincount': 139,
386 def _real_extract(self, url):
387 folder_id, channel = self._match_valid_url(url).group('id', 'channel')
389 webpage = self._download_webpage(url, folder_id)
391 def extract_page_entries(page):
392 webpage = self._download_webpage(
393 f'https://www.cda.pl/{channel}/folder/{folder_id}/vfilm/{page + 1}', folder_id,
394 f'Downloading page {page + 1}', expected_status=404)
395 items = re.findall(r'<a[^>]+href="/video
/([0-9a
-z
]+)"', webpage)
396 for video_id in items:
397 yield self.url_result(f'https://www.cda.pl/video/{video_id}', CDAIE, video_id)
399 return self.playlist_result(
400 OnDemandPagedList(extract_page_entries, self._MAX_PAGE_SIZE),
401 folder_id, self._og_search_title(webpage))