4 from .common
import InfoExtractor
5 from ..networking
.exceptions
import HTTPError
11 get_element_text_and_html_by_tag
,
15 from ..utils
.traversal
import traverse_obj
18 class BundestagIE(InfoExtractor
):
20 r
'https?://dbtg\.tv/[cf]vid/(?P<id>\d+)',
21 r
'https?://www\.bundestag\.de/mediathek/?\?(?:[^#]+&)?videoid=(?P<id>\d+)',
24 'url': 'https://dbtg.tv/cvid/7605304',
28 'title': '145. Sitzung vom 15.12.2023, TOP 24 Barrierefreiheit',
29 'description': 'md5:321a9dc6bdad201264c0045efc371561',
32 'url': 'https://www.bundestag.de/mediathek?videoid=7602120&url=L21lZGlhdGhla292ZXJsYXk=&mod=mediathek',
36 'title': '130. Sitzung vom 18.10.2023, TOP 1 Befragung der Bundesregierung',
37 'description': 'Befragung der Bundesregierung',
40 'url': 'https://www.bundestag.de/mediathek?videoid=7604941#url=L21lZGlhdGhla292ZXJsYXk/dmlkZW9pZD03NjA0OTQx&mod=mediathek',
41 'only_matching': True,
43 'url': 'http://dbtg.tv/fvid/3594346',
44 'only_matching': True,
47 _OVERLAY_URL
= 'https://www.bundestag.de/mediathekoverlay'
48 _INSTANCE_FORMAT
= 'https://cldf-wzw-od.r53.cdn.tv1.eu/13014bundestagod/_definst_/13014bundestag/ondemand/3777parlamentsfernsehen/archiv/app144277506/145293313/{0}/{0}_playlist.smil/playlist.m3u8'
50 _SHARE_URL
= 'https://webtv.bundestag.de/player/macros/_x_s-144277506/shareData.json?contentId='
51 _SHARE_AUDIO_REGEX
= r
'/\d+_(?P<codec>\w+)_(?P<bitrate>\d+)kb_(?P<channels>\w+)_\w+_\d+\.(?P<ext>\w+)'
52 _SHARE_VIDEO_REGEX
= r
'/\d+_(?P<codec>\w+)_(?P<width>\w+)_(?P<height>\w+)_(?P<bitrate>\d+)kb_\w+_\w+_\d+\.(?P<ext>\w+)'
54 def _bt_extract_share_formats(self
, video_id
):
55 share_data
= self
._download
_json
(
56 f
'{self._SHARE_URL}{video_id}', video_id
, note
='Downloading share format JSON')
57 if traverse_obj(share_data
, ('status', 'code', {int}
)) != 1:
58 self
.report_warning(format_field(
59 share_data
, [('status', 'message', {str}
)],
60 'Share API response: %s', default
='Unknown Share API Error')
61 + bug_reports_message())
64 for name
, url
in share_data
.items():
65 if not isinstance(name
, str) or not url_or_none(url
):
68 elif name
.startswith('audio'):
69 match
= re
.search(self
._SHARE
_AUDIO
_REGEX
, url
)
74 **traverse_obj(match
, {
76 'audio_channels': ('channels', {{'mono': 1, 'stereo': 2}.get
}),
77 'abr': ('bitrate', {int_or_none}
),
82 elif name
.startswith('download'):
83 match
= re
.search(self
._SHARE
_VIDEO
_REGEX
, url
)
87 **traverse_obj(match
, {
89 'tbr': ('bitrate', {int_or_none}
),
90 'width': ('width', {int_or_none}
),
91 'height': ('height', {int_or_none}
),
96 def _real_extract(self
, url
):
97 video_id
= self
._match
_id
(url
)
99 result
= {'id': video_id
, 'formats': formats
}
102 formats
.extend(self
._extract
_m
3u8_formats
(
103 self
._INSTANCE
_FORMAT
.format(video_id
), video_id
, m3u8_id
='instance'))
104 except ExtractorError
as error
:
105 if isinstance(error
.cause
, HTTPError
) and error
.cause
.status
== 404:
106 raise ExtractorError('Could not find video id', expected
=True)
107 self
.report_warning(f
'Error extracting hls formats: {error}', video_id
)
108 formats
.extend(self
._bt
_extract
_share
_formats
(video_id
))
110 self
.raise_no_formats('Could not find suitable formats', video_id
=video_id
)
112 result
.update(traverse_obj(self
._download
_webpage
(
113 self
._OVERLAY
_URL
, video_id
,
114 query
={'videoid': video_id
, 'view': 'main'},
115 note
='Downloading metadata overlay', fatal
=False,
118 {functools
.partial(get_element_text_and_html_by_tag
, 'h3')}, 0,
119 {functools
.partial(re
.sub
, r
'<span[^>]*>[^<]+</span>', '')}, {clean_html}
),
120 'description': ({functools
.partial(get_element_text_and_html_by_tag
, 'p')}, 0, {clean_html}
),