1 from .common
import InfoExtractor
4 compat_urllib_parse_urlparse
,
16 return compat_urllib_parse_urlparse(src_url
).path
.split('/')[-1]
19 class SeznamZpravyIE(InfoExtractor
):
20 _VALID_URL
= r
'https?://(?:www\.)?seznamzpravy\.cz/iframe/player\?.*\bsrc='
21 _EMBED_REGEX
= [r
'<iframe\b[^>]+\bsrc=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?seznamzpravy\
.cz
/iframe
/player
\?.*?
)\
1']
23 'url
': 'https
://www
.seznamzpravy
.cz
/iframe
/player?duration
=241&serviceSlug
=zpravy
&src
=https
%3A
%2F
%2Fv39
-a
.sdn
.szn
.cz
%2Fv_39
%2Fvmd
%2F5999c902ea707c67d8e267a9
%3Ffl
%3Dmdk
%2C432f65a0
%7C
&itemType
=video
&autoPlay
=false
&title
=Sv
%C4
%9Bt
%20bez
%20obalu
%3A
%20%C4
%8Ce
%C5
%A1t
%C3
%AD
%20voj
%C3
%A1ci
%20na
%20mis
%C3
%ADch
%20(kr
%C3
%A1tk
%C3
%A1
%20verze
)&series
=Sv
%C4
%9Bt
%20bez
%20obalu
&serviceName
=Seznam
%20Zpr
%C3
%A1vy
&poster
=%2F
%2Fd39
-a
.sdn
.szn
.cz
%2Fd_39
%2Fc_img_F_I
%2FR5puJ
.jpeg
%3Ffl
%3Dcro
%2C0
%2C0
%2C1920
%2C1080
%7Cres
%2C1200
%2C
%2C1
%7Cjpg
%2C80
%2C
%2C1
&width
=1920&height
=1080&cutFrom
=0&cutTo
=0&splVersion
=VOD
&contentId
=170889&contextId
=35990&showAdvert
=true
&collocation
=&autoplayPossible
=true
&embed
=&isVideoTooShortForPreroll
=false
&isVideoTooLongForPostroll
=true
&videoCommentOpKey
=&videoCommentId
=&version
=4.0.76&dotService
=zpravy
&gemiusPrismIdentifier
=bVc1ZIb_Qax4W2v5xOPGpMeCP31kFfrTzj0SqPTLh_b
.Z7
&zoneIdPreroll
=seznam
.pack
.videospot
&skipOffsetPreroll
=5§ionPrefixPreroll
=%2Fzpravy
',
27 'title
': 'Svět bez obalu
: Čeští vojáci na misí
ch (krátká verze
)',
28 'thumbnail
': r're
:^https?
://.*\
.jpe?g
',
30 'series
': 'Svět bez obalu
',
33 'skip_download
': True,
37 'url
': 'https
://www
.seznamzpravy
.cz
/iframe
/player?duration
=null
&serviceSlug
=zpravy
&src
=https
%3A
%2F
%2Flive
-a
.sdn
.szn
.cz
%2Fv_39
%2F59e468fe454f8472a96af9fa
%3Ffl
%3Dmdk
%2C5c1e2840
%7C
&itemType
=livevod
&autoPlay
=false
&title
=P
%C5
%99edseda
%20KDU
-%C4
%8CSL
%20Pavel
%20B
%C4
%9Blobr
%C3
%A1dek
%20ve
%20volebn
%C3
%AD
%20V
%C3
%BDzv
%C4
%9B
%20Seznamu
&series
=V
%C3
%BDzva
&serviceName
=Seznam
%20Zpr
%C3
%A1vy
&poster
=%2F
%2Fd39
-a
.sdn
.szn
.cz
%2Fd_39
%2Fc_img_G_J
%2FjTBCs
.jpeg
%3Ffl
%3Dcro
%2C0
%2C0
%2C1280
%2C720
%7Cres
%2C1200
%2C
%2C1
%7Cjpg
%2C80
%2C
%2C1
&width
=16&height
=9&cutFrom
=0&cutTo
=0&splVersion
=VOD
&contentId
=185688&contextId
=38489&showAdvert
=true
&collocation
=&hideFullScreen
=false
&hideSubtitles
=false
&embed
=&isVideoTooShortForPreroll
=false
&isVideoTooShortForPreroll2
=false
&isVideoTooLongForPostroll
=false
&fakePostrollZoneID
=seznam
.clanky
.zpravy
.preroll
&fakePrerollZoneID
=seznam
.clanky
.zpravy
.preroll
&videoCommentId
=&trim
=default_16x9
&noPrerollVideoLength
=30&noPreroll2VideoLength
=undefined
&noMidrollVideoLength
=0&noPostrollVideoLength
=999999&autoplayPossible
=true
&version
=5.0.41&dotService
=zpravy
&gemiusPrismIdentifier
=zD3g7byfW5ekpXmxTVLaq5Srjw5i4hsYo0HY1aBwIe
..27&zoneIdPreroll
=seznam
.pack
.videospot
&skipOffsetPreroll
=5§ionPrefixPreroll
=%2Fzpravy
%2Fvyzva
&zoneIdPostroll
=seznam
.pack
.videospot
&skipOffsetPostroll
=5§ionPrefixPostroll
=%2Fzpravy
%2Fvyzva
®ression
=false
',
41 'title
': 'Předseda KDU
-ČSL Pavel Bělobrádek ve volební Výzvě Seznamu
',
42 'thumbnail
': r're
:^https?
://.*\
.jpe?g
',
46 'skip_download
': True,
50 def _extract_sdn_formats(self, sdn_url, video_id):
51 sdn_data = self._download_json(sdn_url, video_id)
53 if sdn_data.get('Location
'):
54 sdn_url = sdn_data['Location
']
55 sdn_data = self._download_json(sdn_url, video_id)
58 mp4_formats = try_get(sdn_data, lambda x: x['data
']['mp4
'], dict) or {}
59 for format_id, format_data in mp4_formats.items():
60 relative_url = format_data.get('url
')
65 width, height = format_data.get('resolution
')
66 except (TypeError, ValueError):
67 width, height = None, None
70 'url
': urljoin(sdn_url, relative_url),
71 'format_id
': 'http
-%s' % format_id,
72 'tbr
': int_or_none(format_data.get('bandwidth
'), scale=1000),
73 'width
': int_or_none(width),
74 'height
': int_or_none(height),
76 f.update(parse_codecs(format_data.get('codec
')))
79 pls = sdn_data.get('pls
', {})
81 def get_url(format_id):
82 return try_get(pls, lambda x: x[format_id]['url
'], compat_str)
84 dash_rel_url = get_url('dash
')
86 formats.extend(self._extract_mpd_formats(
87 urljoin(sdn_url, dash_rel_url), video_id, mpd_id='dash
',
90 hls_rel_url = get_url('hls
')
92 formats.extend(self._extract_m3u8_formats(
93 urljoin(sdn_url, hls_rel_url), video_id, ext='mp4
',
94 m3u8_id='hls
', fatal=False))
98 def _real_extract(self, url):
99 params = parse_qs(url)
101 src = params['src
'][0]
102 title = params['title
'][0]
103 video_id = params.get('contentId
', [_raw_id(src)])[0]
104 formats = self._extract_sdn_formats(src + 'spl2
,2,VOD
', video_id)
106 duration = int_or_none(params.get('duration
', [None])[0])
107 series = params.get('series
', [None])[0]
108 thumbnail = params.get('poster
', [None])[0]
113 'thumbnail
': thumbnail,
114 'duration
': duration,
120 class SeznamZpravyArticleIE(InfoExtractor):
121 _VALID_URL = r'https?
://(?
:www\
.)?
(?
:seznam\
.cz
/zpravy|seznamzpravy\
.cz
)/clanek
/(?
:[^
/?
#&]+)-(?P<id>\d+)'
122 _API_URL
= 'https://apizpravy.seznam.cz/'
125 # two videos on one page, with SDN URL
126 'url': 'https://www.seznamzpravy.cz/clanek/jejich-svet-na-nas-utoci-je-lepsi-branit-se-na-jejich-pisecku-rika-reziser-a-major-v-zaloze-marhoul-35990',
129 'title': 'md5:6011c877a36905f28f271fcd8dcdb0f2',
130 'description': 'md5:933f7b06fa337a814ba199d3596d27ba',
134 # video with live stream URL
135 'url': 'https://www.seznam.cz/zpravy/clanek/znovu-do-vlady-s-ano-pavel-belobradek-ve-volebnim-specialu-seznamu-38489',
138 'title': 'md5:8fa1afdc36fd378cf0eba2b74c5aca60',
139 'description': 'md5:428e7926a1a81986ec7eb23078004fb4',
144 def _real_extract(self
, url
):
145 article_id
= self
._match
_id
(url
)
147 webpage
= self
._download
_webpage
(url
, article_id
)
149 info
= self
._search
_json
_ld
(webpage
, article_id
, default
={})
151 title
= info
.get('title') or self
._og
_search
_title
(webpage
, fatal
=False)
152 description
= info
.get('description') or self
._og
_search
_description
(webpage
)
154 return self
.playlist_result([
155 self
.url_result(entry_url
, ie
=SeznamZpravyIE
.ie_key())
156 for entry_url
in SeznamZpravyIE
._extract
_embed
_urls
(url
, webpage
)],
157 article_id
, title
, description
)