3 from .common
import InfoExtractor
12 class MediaKlikkIE(InfoExtractor
):
13 _VALID_URL
= r
'''(?x)https?://(?:www\.)?
14 (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/
15 (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
19 # (old) mediaklikk. date in html.
20 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
23 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
25 'upload_date': '20210901',
26 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
28 'skip': 'Webpage redirects to 404 page',
30 # mediaklikk. date in html.
31 'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
34 'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
35 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
37 'upload_date': '20230903',
38 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
42 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
45 'title': 'Gyémánt Liga, Párizs',
47 'upload_date': '20210830',
48 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg',
50 'skip': 'Webpage redirects to 404 page',
53 'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
56 'title': 'Atlétika – Gyémánt Liga, Brüsszel',
57 'display_id': 'atletika-gyemant-liga-brusszel',
59 'upload_date': '20230908',
60 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
63 # m4sport with *video/ url and no date
64 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
67 'title': 'Real Madrid - Chelsea 1-1',
68 'display_id': 'real-madrid-chelsea-1-1',
70 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
74 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
77 'title': 'Feltételeket szabott a főváros',
79 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg',
81 'skip': 'Webpage redirects to video list page',
84 'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
87 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
88 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
90 'upload_date': '20230911',
91 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
95 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
98 'title': 'Tha Shudras az Akusztikban',
100 'upload_date': '20210607',
101 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg',
103 'skip': 'Webpage redirects to empty page',
106 'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
109 'title': 'Futball Fesztivál a Margitszigeten',
110 'display_id': 'futball-fesztival-a-margitszigeten',
112 'upload_date': '20230909',
113 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
117 def _real_extract(self
, url
):
118 mobj
= self
._match
_valid
_url
(url
)
119 display_id
= mobj
.group('id')
120 webpage
= self
._download
_webpage
(url
, display_id
)
122 player_data_str
= self
._html
_search
_regex
(
123 r
'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage
, 'player data')
124 player_data
= self
._parse
_json
(player_data_str
, display_id
, urllib
.parse
.unquote
)
125 video_id
= str(player_data
['contentId'])
126 title
= player_data
.get('title') or self
._og
_search
_title
(webpage
, fatal
=False) or \
127 self
._html
_search
_regex
(r
'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage
, 'title')
129 upload_date
= unified_strdate(
130 '{}-{}-{}'.format(mobj
.group('year'), mobj
.group('month'), mobj
.group('day')))
132 upload_date
= unified_strdate(self
._html
_search
_regex
(
133 r
'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage
, 'upload date', default
=None))
135 player_data
['video'] = player_data
.pop('token')
136 player_page
= self
._download
_webpage
(
137 'https://player.mediaklikk.hu/playernew/player.php', video_id
,
138 query
=player_data
, headers
={'Referer': url
})
139 player_json
= self
._search
_json
(
140 r
'\bpl\.setup\s*\(', player_page
, 'player json', video_id
, end_pattern
=r
'\);')
141 playlist_url
= traverse_obj(
142 player_json
, ('playlist', lambda _
, v
: v
['type'] == 'hls', 'file', {url_or_none}
), get_all
=False)
144 raise ExtractorError('Unable to extract playlist url')
146 formats
= self
._extract
_wowza
_formats
(
147 playlist_url
, video_id
, skip_protocols
=['f4m', 'smil', 'dash'])
152 'display_id': display_id
,
154 'upload_date': upload_date
,
155 'thumbnail': player_data
.get('bgImage') or self
._og
_search
_thumbnail
(webpage
),