[ie/youtube] Fix `uploader_id` extraction (#11818)
[yt-dlp.git] / yt_dlp / extractor / mediaklikk.py
blob197e91d1d917116ac59e1558c6590b86616817d4
1 import urllib.parse
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 traverse_obj,
7 unified_strdate,
8 url_or_none,
12 class MediaKlikkIE(InfoExtractor):
13 _VALID_URL = r'''(?x)https?://(?:www\.)?
14 (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/
15 (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
16 (?P<id>[^/#?_]+)'''
18 _TESTS = [{
19 'url': 'https://mediaklikk.hu/filmajanlo/cikk/az-ajto/',
20 'info_dict': {
21 'id': '668177',
22 'title': 'Az ajtó',
23 'display_id': 'az-ajto',
24 'ext': 'mp4',
25 'thumbnail': 'https://cdn.cms.mtv.hu/wp-content/uploads/sites/4/2016/01/vlcsnap-2023-07-31-14h18m52s111.jpg',
27 }, {
28 # (old) mediaklikk. date in html.
29 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
30 'info_dict': {
31 'id': '4754129',
32 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
33 'ext': 'mp4',
34 'upload_date': '20210901',
35 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
37 'skip': 'Webpage redirects to 404 page',
38 }, {
39 # mediaklikk. date in html.
40 'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
41 'info_dict': {
42 'id': '6696133',
43 'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
44 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
45 'ext': 'mp4',
46 'upload_date': '20230903',
47 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
49 'skip': 'Webpage redirects to 404 page',
50 }, {
51 # (old) m4sport
52 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
53 'info_dict': {
54 'id': '4754999',
55 'title': 'Gyémánt Liga, Párizs',
56 'ext': 'mp4',
57 'upload_date': '20210830',
58 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg',
60 'skip': 'Webpage redirects to 404 page',
61 }, {
62 # m4sport
63 'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
64 'info_dict': {
65 'id': '6711136',
66 'title': 'Atlétika – Gyémánt Liga, Brüsszel',
67 'display_id': 'atletika-gyemant-liga-brusszel',
68 'ext': 'mp4',
69 'upload_date': '20230908',
70 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
72 'skip': 'Webpage redirects to 404 page',
73 }, {
74 # m4sport with *video/ url and no date
75 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
76 'info_dict': {
77 'id': '4492099',
78 'title': 'Real Madrid - Chelsea 1-1',
79 'display_id': 'real-madrid-chelsea-1-1',
80 'ext': 'mp4',
81 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
83 'skip': 'Webpage redirects to 404 page',
84 }, {
85 # (old) hirado
86 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
87 'info_dict': {
88 'id': '4760120',
89 'title': 'Feltételeket szabott a főváros',
90 'ext': 'mp4',
91 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg',
93 'skip': 'Webpage redirects to video list page',
94 }, {
95 # hirado
96 'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
97 'info_dict': {
98 'id': '6716068',
99 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
100 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
101 'ext': 'mp4',
102 'upload_date': '20230911',
103 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
105 'skip': 'Webpage redirects to video list page',
106 }, {
107 # (old) petofilive
108 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
109 'info_dict': {
110 'id': '4571948',
111 'title': 'Tha Shudras az Akusztikban',
112 'ext': 'mp4',
113 'upload_date': '20210607',
114 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg',
116 'skip': 'Webpage redirects to empty page',
117 }, {
118 # petofilive
119 'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
120 'info_dict': {
121 'id': '6713233',
122 'title': 'Futball Fesztivál a Margitszigeten',
123 'display_id': 'futball-fesztival-a-margitszigeten',
124 'ext': 'mp4',
125 'upload_date': '20230909',
126 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
128 'skip': 'Webpage redirects to video list page',
131 def _real_extract(self, url):
132 mobj = self._match_valid_url(url)
133 display_id = mobj.group('id')
134 webpage = self._download_webpage(url, display_id)
136 player_data_str = self._html_search_regex(
137 r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
138 player_data = self._parse_json(player_data_str, display_id, urllib.parse.unquote)
139 video_id = str(player_data['contentId'])
140 title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
141 self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
143 upload_date = unified_strdate(
144 '{}-{}-{}'.format(mobj.group('year'), mobj.group('month'), mobj.group('day')))
145 if not upload_date:
146 upload_date = unified_strdate(self._html_search_regex(
147 r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
149 player_data['video'] = player_data.pop('token')
150 player_page = self._download_webpage(
151 'https://player.mediaklikk.hu/playernew/player.php', video_id,
152 query=player_data, headers={'Referer': url})
153 player_json = self._search_json(
154 r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
155 playlist_url = traverse_obj(
156 player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
157 if not playlist_url:
158 raise ExtractorError('Unable to extract playlist url')
160 formats, subtitles = self._extract_m3u8_formats_and_subtitles(playlist_url, video_id)
162 return {
163 'id': video_id,
164 'title': title,
165 'display_id': display_id,
166 'formats': formats,
167 'subtitles': subtitles,
168 'upload_date': upload_date,
169 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),