[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / aitube.py
blob5179b72e9fbc7c3ce0c16acfd6621e3e1144322e
1 from .common import InfoExtractor
2 from ..utils import int_or_none, merge_dicts
5 class AitubeKZVideoIE(InfoExtractor):
6 _VALID_URL = r'https?://aitube\.kz/(?:video|embed/)\?(?:[^\?]+)?id=(?P<id>[\w-]+)'
7 _TESTS = [{
8 # id paramater as first parameter
9 'url': 'https://aitube.kz/video?id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7&season=1',
10 'info_dict': {
11 'id': '9291d29b-c038-49a1-ad42-3da2051d353c',
12 'ext': 'mp4',
13 'duration': 2174.0,
14 'channel_id': '94962f73-013b-432c-8853-1bd78ca860fe',
15 'like_count': int,
16 'channel': 'ASTANA TV',
17 'comment_count': int,
18 'view_count': int,
19 'description': 'Смотреть любимые сериалы и видео, поделиться видео и сериалами с друзьями и близкими',
20 'thumbnail': 'https://cdn.static02.aitube.kz/kz.aitudala.aitube.staticaccess/files/ddf2a2ff-bee3-409b-b5f2-2a8202bba75b',
21 'upload_date': '20221102',
22 'timestamp': 1667370519,
23 'title': 'Ангел хранитель 1 серия',
24 'channel_follower_count': int,
26 }, {
27 # embed url
28 'url': 'https://aitube.kz/embed/?id=9291d29b-c038-49a1-ad42-3da2051d353c',
29 'only_matching': True,
30 }, {
31 # id parameter is not as first paramater
32 'url': 'https://aitube.kz/video?season=1&id=9291d29b-c038-49a1-ad42-3da2051d353c&playlistId=d55b1f5f-ef2a-4f23-b646-2a86275b86b7',
33 'only_matching': True,
36 def _real_extract(self, url):
37 video_id = self._match_id(url)
38 webpage = self._download_webpage(url, video_id)
40 nextjs_data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['videoInfo']
41 json_ld_data = self._search_json_ld(webpage, video_id)
43 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
44 f'https://api-http.aitube.kz/kz.aitudala.aitube.staticaccess/video/{video_id}/video', video_id)
46 return merge_dicts({
47 'id': video_id,
48 'title': nextjs_data.get('title') or self._html_search_meta(['name', 'og:title'], webpage),
49 'description': nextjs_data.get('description'),
50 'formats': formats,
51 'subtitles': subtitles,
52 'view_count': (nextjs_data.get('viewCount')
53 or int_or_none(self._html_search_meta('ya:ovs:views_total', webpage))),
54 'like_count': nextjs_data.get('likeCount'),
55 'channel': nextjs_data.get('channelTitle'),
56 'channel_id': nextjs_data.get('channelId'),
57 'thumbnail': nextjs_data.get('coverUrl'),
58 'comment_count': nextjs_data.get('commentCount'),
59 'channel_follower_count': int_or_none(nextjs_data.get('channelSubscriberCount')),
60 }, json_ld_data)