[ie/facebook] Support more groups URLs (#11576)
[yt-dlp3.git] / yt_dlp / extractor / mojevideo.py
blob145e30697080d09196f2db4be851f28b410b2312
1 from .common import InfoExtractor
2 from ..utils import js_to_json, remove_end, update_url_query
5 class MojevideoIE(InfoExtractor):
6 IE_DESC = 'mojevideo.sk'
7 _VALID_URL = r'https?://(?:www\.)?mojevideo\.sk/video/(?P<id>\w+)/(?P<display_id>[\w()]+?)\.html'
9 _TESTS = [{
10 'url': 'https://www.mojevideo.sk/video/3d17c/chlapci_dobetonovali_sme_mame_hotovo.html',
11 'md5': '384a4628bd2bbd261c5206cf77c38c17',
12 'info_dict': {
13 'id': '3d17c',
14 'ext': 'mp4',
15 'title': 'Chlapci dobetónovali sme, máme hotovo!',
16 'display_id': 'chlapci_dobetonovali_sme_mame_hotovo',
17 'description': 'md5:a0822126044050d304a9ef58c92ddb34',
18 'thumbnail': 'https://fs5.mojevideo.sk/imgfb/250236.jpg',
19 'duration': 21.0,
20 'upload_date': '20230919',
21 'timestamp': 1695129706,
22 'like_count': int,
23 'dislike_count': int,
24 'view_count': int,
25 'comment_count': int,
27 }, {
28 # 720p
29 'url': 'https://www.mojevideo.sk/video/14677/den_blbec.html',
30 'md5': '517c3e111c53a67d10b429c1f344ba2f',
31 'info_dict': {
32 'id': '14677',
33 'ext': 'mp4',
34 'title': 'Deň blbec?',
35 'display_id': 'den_blbec',
36 'description': 'I maličkosť vám môže zmeniť celý deň. Nikdy nezahadzujte žuvačky na zem!',
37 'thumbnail': 'https://fs5.mojevideo.sk/imgfb/83575.jpg',
38 'duration': 100.0,
39 'upload_date': '20120515',
40 'timestamp': 1337076481,
41 'like_count': int,
42 'dislike_count': int,
43 'view_count': int,
44 'comment_count': int,
46 }, {
47 # 1080p
48 'url': 'https://www.mojevideo.sk/video/2feb2/band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd).html',
49 'md5': '64599a23d3ac31cf2fe069e4353d8162',
50 'info_dict': {
51 'id': '2feb2',
52 'ext': 'mp4',
53 'title': 'BAND-MAID - onset (Instrumental) Live - Zepp Tokyo (Full HD)',
54 'display_id': 'band_maid_onset_(instrumental)_live_zepp_tokyo_(full_hd)',
55 'description': 'Výborná inštrumentálna skladba od skupiny BAND-MAID.',
56 'thumbnail': 'https://fs5.mojevideo.sk/imgfb/196274.jpg',
57 'duration': 240.0,
58 'upload_date': '20190708',
59 'timestamp': 1562576592,
60 'like_count': int,
61 'dislike_count': int,
62 'view_count': int,
63 'comment_count': int,
65 }, {
66 # 720p
67 'url': 'https://www.mojevideo.sk/video/358c8/dva_nissany_skyline_strielaju_v_londyne.html',
68 'only_matching': True,
69 }, {
70 # 720p
71 'url': 'https://www.mojevideo.sk/video/2455d/gopro_hero4_session_nova_sportova_vodotesna_kamera.html',
72 'only_matching': True,
73 }, {
74 # 1080p
75 'url': 'https://www.mojevideo.sk/video/352ee/amd_rx_6800_xt_vs_nvidia_rtx_3080_(test_v_9_hrach).html',
76 'only_matching': True,
77 }, {
78 # 1080p
79 'url': 'https://www.mojevideo.sk/video/2cbeb/trailer_z_avengers_infinity_war.html',
80 'only_matching': True,
83 def _real_extract(self, url):
84 video_id, display_id = self._match_valid_url(url).groups()
85 webpage = self._download_webpage(url, video_id)
87 video_id_dec = self._search_regex(
88 r'\bvId\s*=\s*(\d+)', webpage, 'video id', fatal=False) or str(int(video_id, 16))
89 video_exp = self._search_regex(r'\bvEx\s*=\s*["\'](\d+)', webpage, 'video expiry')
90 video_hashes = self._search_json(
91 r'\bvHash\s*=', webpage, 'video hashes', video_id,
92 contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json)
94 formats = []
95 for video_hash, (suffix, quality, format_note) in zip(video_hashes, [
96 ('', 1, 'normálna kvalita'),
97 ('_lq', 0, 'nízka kvalita'),
98 ('_hd', 2, 'HD-720p'),
99 ('_fhd', 3, 'FULL HD-1080p'),
100 ('_2k', 4, '2K-1440p'),
102 formats.append({
103 'format_id': f'mp4-{quality}',
104 'quality': quality,
105 'format_note': format_note,
106 'url': update_url_query(
107 f'https://cache01.mojevideo.sk/securevideos69/{video_id_dec}{suffix}.mp4', {
108 'md5': video_hash,
109 'expires': video_exp,
113 return {
114 'id': video_id,
115 'display_id': display_id,
116 'formats': formats,
117 'title': (self._og_search_title(webpage, default=None)
118 or remove_end(self._html_extract_title(webpage, 'title'), ' - Mojevideo')),
119 'description': self._og_search_description(webpage),
120 **self._search_json_ld(webpage, video_id, default={}),