4 from .common
import InfoExtractor
15 class SVTBaseIE(InfoExtractor
):
16 _GEO_COUNTRIES
= ['SE']
18 def _extract_video(self
, video_info
, video_id
):
19 is_live
= dict_get(video_info
, ('live', 'simulcast'), default
=False)
20 m3u8_protocol
= 'm3u8' if is_live
else 'm3u8_native'
23 for vr
in video_info
['videoReferences']:
24 player_type
= vr
.get('playerType') or vr
.get('format')
26 ext
= determine_ext(vurl
)
28 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
30 ext
='mp4', entry_protocol
=m3u8_protocol
,
31 m3u8_id
=player_type
, fatal
=False)
33 self
._merge
_subtitles
(subs
, target
=subtitles
)
35 formats
.extend(self
._extract
_f
4m
_formats
(
36 vurl
+ '?hdcore=3.3.0', video_id
,
37 f4m_id
=player_type
, fatal
=False))
39 fmts
, subs
= self
._extract
_mpd
_formats
_and
_subtitles
(
40 vurl
, video_id
, mpd_id
=player_type
, fatal
=False)
42 self
._merge
_subtitles
(subs
, target
=subtitles
)
45 'format_id': player_type
,
48 rights
= try_get(video_info
, lambda x
: x
['rights'], dict) or {}
49 if not formats
and rights
.get('geoBlockedSweden'):
50 self
.raise_geo_restricted(
51 'This video is only available in Sweden',
52 countries
=self
._GEO
_COUNTRIES
, metadata_available
=True)
54 subtitle_references
= dict_get(video_info
, ('subtitles', 'subtitleReferences'))
55 if isinstance(subtitle_references
, list):
56 for sr
in subtitle_references
:
57 subtitle_url
= sr
.get('url')
58 subtitle_lang
= sr
.get('language', 'sv')
63 if determine_ext(subtitle_url
) == 'm3u8':
64 # XXX: no way of testing, is it ever hit?
66 subtitles
.setdefault(subtitle_lang
, []).append(sub
)
68 title
= video_info
.get('title')
70 series
= video_info
.get('programTitle')
71 season_number
= int_or_none(video_info
.get('season'))
72 episode
= video_info
.get('episodeTitle')
73 episode_number
= int_or_none(video_info
.get('episodeNumber'))
75 timestamp
= unified_timestamp(rights
.get('validFrom'))
76 duration
= int_or_none(dict_get(video_info
, ('materialLength', 'contentDuration')))
79 video_info
, ('inappropriateForChildren', 'blockedForChildren'),
80 skip_false_values
=False)
82 age_limit
= 18 if adult
else 0
88 'subtitles': subtitles
,
90 'timestamp': timestamp
,
91 'age_limit': age_limit
,
93 'season_number': season_number
,
95 'episode_number': episode_number
,
100 class SVTIE(SVTBaseIE
):
101 _VALID_URL
= r
'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
102 _EMBED_REGEX
= [rf
'(?:<iframe src|href)="(?P<url>{_VALID_URL}[^"]*)"']
104 'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
105 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
109 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
115 def _real_extract(self
, url
):
116 mobj
= self
._match
_valid
_url
(url
)
117 widget_id
= mobj
.group('widget_id')
118 article_id
= mobj
.group('id')
120 info
= self
._download
_json
(
121 f
'http://www.svt.se/wd?widgetId={widget_id}&articleId={article_id}&format=json&type=embed&output=json',
124 info_dict
= self
._extract
_video
(info
['video'], article_id
)
125 info_dict
['title'] = info
['context']['title']
129 class SVTPlayBaseIE(SVTBaseIE
):
130 _SVTPLAY_RE
= r
'root\s*\[\s*(["\'])_
*svtplay\
1\s
*\
]\s
*=\s
*(?P
<json
>{.+?
})\s
*;\s
*\n'
133 class SVTPlayIE(SVTPlayBaseIE):
134 IE_DESC = 'SVT Play
and Öppet arkiv
'
135 _VALID_URL = r'''(?x)
139 https?://(?:www\.)?svt\.se/barnkanalen/barnplay/[^/]+/
141 (?P<svt_id>[^/?#&]+)|
142 https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
143 (?:.*?(?:modalId|id)=(?P<modal_id>[\da-zA-Z-]+))?
147 'url
': 'https
://www
.svtplay
.se
/video
/30479064',
148 'md5
': '2382036fd6f8c994856c323fe51c426e
',
152 'title
': 'Designdrömmar i Stenungsund
',
153 'timestamp
': 1615770000,
154 'upload_date
': '20210315',
156 'thumbnail
': r're
:^https?
://(?
:.*[\
.-]jpg|www
.svtstatic
.se
/image
/.*)$
',
165 'skip_download
': 'm3u8
',
167 'skip
': 'Episode
is no longer available
',
169 'url
': 'https
://www
.svtplay
.se
/video
/emBxBQj
',
170 'md5
': '2382036fd6f8c994856c323fe51c426e
',
174 'title
': '1. Farlig kryssning
',
175 'timestamp
': 1491019200,
176 'upload_date
': '20170401',
178 'thumbnail
': r're
:^https?
://(?
:.*[\
.-]jpg|www
.svtstatic
.se
/image
/.*)$
',
180 'episode
': '1. Farlig kryssning
',
181 'series
': 'Rederiet
',
187 'skip_download
': 'm3u8
',
190 'url
': 'https
://www
.svtplay
.se
/video
/jz2rYz7
/anders
-hansen
-moter
/james
-fallon?info
=visa
',
194 'title
': 'James Fallon
',
195 'timestamp
': 1673917200,
196 'upload_date
': '20230117',
198 'thumbnail
': r're
:^https?
://(?
:.*[\
.-]jpg|www
.svtstatic
.se
/image
/.*)$
',
200 'episode
': 'James Fallon
',
201 'series
': 'Anders Hansen möter
...',
204 'skip_download
': 'dash
',
207 'url
': 'https
://www
.svtplay
.se
/video
/30479064/husdrommar
/husdrommar
-sasong
-8-designdrommar
-i
-stenungsund?modalId
=8zVbDPA
',
208 'only_matching
': True,
210 'url
': 'https
://www
.svtplay
.se
/video
/30684086/rapport
/rapport
-24-apr
-18-00-7?
id=e72gVpa
',
211 'only_matching
': True,
213 # geo restricted to Sweden
214 'url
': 'http
://www
.oppetarkiv
.se
/video
/5219710/trollflojten
',
215 'only_matching
': True,
217 'url
': 'http
://www
.svtplay
.se
/klipp
/9023742/stopptid
-om
-bjorn
-borg
',
218 'only_matching
': True,
220 'url
': 'https
://www
.svtplay
.se
/kanaler
/svt1
',
221 'only_matching
': True,
223 'url
': 'svt
:1376446-003A
',
224 'only_matching
': True,
226 'url
': 'svt
:14278044',
227 'only_matching
': True,
229 'url
': 'https
://www
.svt
.se
/barnkanalen
/barnplay
/kar
/eWv5MLX
/',
230 'only_matching
': True,
232 'url
': 'svt
:eWv5MLX
',
233 'only_matching
': True,
236 def _extract_by_video_id(self, video_id, webpage=None):
237 data = self._download_json(
238 f'https
://api
.svt
.se
/videoplayer
-api
/video
/{video_id}
',
239 video_id, headers=self.geo_verification_headers())
240 info_dict = self._extract_video(data, video_id)
241 if not info_dict.get('title
'):
242 title = dict_get(info_dict, ('episode
', 'series
'))
243 if not title and webpage:
245 r'\s
*\|\s
*.+?$
', '', self._og_search_title(webpage))
248 info_dict['title
'] = title
251 def _real_extract(self, url):
252 mobj = self._match_valid_url(url)
253 video_id = mobj.group('id')
254 svt_id = mobj.group('svt_id
') or mobj.group('modal_id
')
257 return self._extract_by_video_id(svt_id)
259 webpage = self._download_webpage(url, video_id)
261 data = self._parse_json(
263 self._SVTPLAY_RE, webpage, 'embedded data
', default='{}',
265 video_id, fatal=False)
267 thumbnail = self._og_search_thumbnail(webpage)
270 video_info = try_get(
271 data, lambda x: x['context
']['dispatcher
']['stores
']['VideoTitlePageStore
']['data
']['video
'],
274 info_dict = self._extract_video(video_info, video_id)
276 'title
': data['context
']['dispatcher
']['stores
']['MetaStore
']['title
'],
277 'thumbnail
': thumbnail,
282 data, lambda x: x['statistics
']['dataLake
']['content
']['id'],
286 nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
287 svt_id = traverse_obj(nextjs_data, (
288 'props
', 'urqlState
', ..., 'data
', {json.loads}, 'detailsPageByPath
',
289 'video
', 'svtId
', {str}), get_all=False)
292 svt_id = self._search_regex(
293 (r'<video
[^
>]+data
-video
-id=["\']([\da-zA-Z-]+)',
294 r'<[^>]+\bdata-rt=["\']top
-area
-play
-button
["\'][^>]+\bhref=["\'][^
"\']*video/[\w-]+/[^"\']*\b(?
:modalId|
id)=([\w
-]+)'),
297 info_dict = self._extract_by_video_id(svt_id, webpage)
298 info_dict['thumbnail
'] = thumbnail
303 class SVTSeriesIE(SVTPlayBaseIE):
304 _VALID_URL = r'https?
://(?
:www\
.)?svtplay\
.se
/(?P
<id>[^
/?
&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
306 'url': 'https://www.svtplay.se/rederiet',
310 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
312 'playlist_mincount': 318,
314 'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
316 'id': 'season-2-14445680',
317 'title': 'Rederiet - Säsong 2',
318 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
320 'playlist_mincount': 12,
324 def suitable(cls
, url
):
325 return False if SVTIE
.suitable(url
) or SVTPlayIE
.suitable(url
) else super().suitable(url
)
327 def _real_extract(self
, url
):
328 series_slug
, season_id
= self
._match
_valid
_url
(url
).groups()
330 series
= self
._download
_json
(
331 'https://api.svt.se/contento/graphql', series_slug
,
332 'Downloading series page', query
={
334 listablesBySlug(slugs: ["%s"]) {
335 associatedContent(include: [productionPeriod, season]) {
351 }''' % series_slug
, # noqa: UP031
352 })['data']['listablesBySlug'][0]
357 for season
in series
['associatedContent']:
358 if not isinstance(season
, dict):
361 if season
.get('id') != season_id
:
363 season_name
= season
.get('name')
364 items
= season
.get('items')
365 if not isinstance(items
, list):
368 video
= item
.get('item') or {}
369 content_id
= video
.get('videoSvtId')
370 if not content_id
or not isinstance(content_id
, str):
372 entries
.append(self
.url_result(
373 'svt:' + content_id
, SVTPlayIE
.ie_key(), content_id
))
375 title
= series
.get('name')
376 season_name
= season_name
or season_id
378 if title
and season_name
:
379 title
= f
'{title} - {season_name}'
383 return self
.playlist_result(
384 entries
, season_id
or series
.get('id'), title
,
385 dict_get(series
, ('longDescription', 'shortDescription')))
388 class SVTPageIE(SVTBaseIE
):
389 _VALID_URL
= r
'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
391 'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
393 'title': 'Viktor, 18, förlorade armar och ben i sepsis – vill återuppta karaten och bli svetsare',
394 'id': 'viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
398 'url': 'https://www.svt.se/nyheter/lokalt/skane/forsvarsmakten-om-trafikkaoset-pa-e22-kunde-inte-varit-dar-snabbare',
401 'title': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
405 'timestamp': 1704370009,
406 'episode': 'Försvarsmakten om trafikkaoset på E22: Kunde inte varit där snabbare',
407 'series': 'Lokala Nyheter Skåne',
408 'upload_date': '20240104',
411 'skip_download': True,
414 'url': 'https://www.svt.se/nyheter/svtforum/2023-tungt-ar-for-svensk-media',
416 'title': '2023 tungt år för svensk media',
422 'timestamp': 1702980479,
423 'upload_date': '20231219',
424 'episode': 'Mediestudier',
427 'skip_download': True,
430 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
433 'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
436 'skip': 'Video is gone',
438 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
441 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
444 'skip': 'Video is gone',
447 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
451 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
455 'skip': 'Video is gone',
457 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
458 'only_matching': True,
460 'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
461 'only_matching': True,
465 def suitable(cls
, url
):
466 return False if SVTIE
.suitable(url
) or SVTPlayIE
.suitable(url
) else super().suitable(url
)
468 def _real_extract(self
, url
):
469 display_id
= self
._match
_id
(url
)
471 webpage
= self
._download
_webpage
(url
, display_id
)
472 title
= self
._og
_search
_title
(webpage
)
474 urql_state
= self
._search
_json
(
475 r
'window\.svt\.(?:nyh\.)?urqlState\s*=', webpage
, 'json data', display_id
)
477 data
= traverse_obj(urql_state
, (..., 'data', {str}
, {json
.loads
}), get_all
=False) or {}
480 for video_id
in set(traverse_obj(data
, (
481 'page', (('topMedia', 'svtId'), ('body', ..., 'video', 'svtId')), {str}
,
483 info
= self
._extract
_video
(
484 self
._download
_json
(f
'https://api.svt.se/video/{video_id}', video_id
), video_id
)
485 info
['title'] = title
488 return self
.playlist_result(entries(), display_id
, title
)