[ie/facebook] Support more groups URLs (#11576)
[yt-dlp3.git] / yt_dlp / extractor / bibeltv.py
blobad00245def72205bb94ed954a76bf1ffcf6a90ae
2 from .common import InfoExtractor
3 from ..utils import (
4 ExtractorError,
5 clean_html,
6 determine_ext,
7 format_field,
8 int_or_none,
9 js_to_json,
10 orderedSet,
11 parse_iso8601,
12 traverse_obj,
13 url_or_none,
17 class BibelTVBaseIE(InfoExtractor):
18 _GEO_COUNTRIES = ['AT', 'CH', 'DE']
19 _GEO_BYPASS = False
21 API_URL = 'https://www.bibeltv.de/mediathek/api'
22 AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
24 def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
25 formats = []
26 subtitles = {}
27 for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
28 media_ext = determine_ext(media_url)
29 if media_ext == 'm3u8':
30 m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
31 media_url, crn_id, live=is_live)
32 formats.extend(m3u8_formats)
33 subtitles.update(m3u8_subs)
34 elif media_ext == 'mpd':
35 mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
36 formats.extend(mpd_formats)
37 subtitles.update(mpd_subs)
38 elif media_ext == 'mp4':
39 formats.append({'url': media_url})
40 else:
41 self.report_warning(f'Unknown format {media_ext!r}')
43 return formats, subtitles
45 @staticmethod
46 def _extract_base_info(data):
47 return {
48 'id': data['crn'],
49 **traverse_obj(data, {
50 'title': 'title',
51 'description': 'description',
52 'duration': ('duration', {int_or_none(scale=1000)}),
53 'timestamp': ('schedulingStart', {parse_iso8601}),
54 'season_number': 'seasonNumber',
55 'episode_number': 'episodeNumber',
56 'view_count': 'viewCount',
57 'like_count': 'likeCount',
58 }),
59 'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
60 'url': ('url', {url_or_none}),
61 }))),
64 def _extract_url_info(self, data):
65 return {
66 '_type': 'url',
67 'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
68 **self._extract_base_info(data),
71 def _extract_video_info(self, data):
72 crn_id = data['crn']
74 if data.get('drm'):
75 self.report_drm(crn_id)
77 json_data = self._download_json(
78 format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
79 headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
80 errnote='No formats available') or {}
82 formats, subtitles = self._extract_formats_and_subtitles(
83 traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
85 return {
86 '_type': 'video',
87 **self._extract_base_info(data),
88 'formats': formats,
89 'subtitles': subtitles,
93 class BibelTVVideoIE(BibelTVBaseIE):
94 IE_DESC = 'BibelTV single video'
95 _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
96 IE_NAME = 'bibeltv:video'
98 _TESTS = [{
99 'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
100 'md5': 'ec1c07efe54353780512e8a4103b612e',
101 'info_dict': {
102 'id': '344436',
103 'ext': 'mp4',
104 'title': 'Alte Wege',
105 'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
106 'timestamp': 1677877071,
107 'duration': 150.0,
108 'upload_date': '20230303',
109 'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
110 'episode': 'Episode 1',
111 'episode_number': 1,
112 'view_count': int,
113 'like_count': int,
115 'params': {
116 'format': '6',
120 def _real_extract(self, url):
121 crn_id = self._match_id(url)
122 video_data = traverse_obj(
123 self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
124 ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
125 if not video_data:
126 raise ExtractorError('Missing video data.')
128 return self._extract_video_info(video_data)
131 class BibelTVSeriesIE(BibelTVBaseIE):
132 IE_DESC = 'BibelTV series playlist'
133 _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
134 IE_NAME = 'bibeltv:series'
136 _TESTS = [{
137 'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
138 'playlist_mincount': 400,
139 'info_dict': {
140 'id': '333485',
141 'title': 'Ein Wunder für jeden Tag',
142 'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
146 def _real_extract(self, url):
147 crn_id = self._match_id(url)
148 webpage = self._download_webpage(url, crn_id)
149 nextjs_data = self._search_nextjs_data(webpage, crn_id)
150 series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
151 if not series_data:
152 raise ExtractorError('Missing series data.')
154 return self.playlist_result(
155 traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
156 crn_id, series_data.get('title'), clean_html(series_data.get('description')))
159 class BibelTVLiveIE(BibelTVBaseIE):
160 IE_DESC = 'BibelTV live program'
161 _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
162 IE_NAME = 'bibeltv:live'
164 _TESTS = [{
165 'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
166 'info_dict': {
167 'id': 'bibeltv',
168 'ext': 'mp4',
169 'title': 're:Bibel TV',
170 'live_status': 'is_live',
171 'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
173 'params': {'skip_download': 'm3u8'},
174 }, {
175 'url': 'https://www.bibeltv.de/livestreams/impuls/',
176 'only_matching': True,
179 def _real_extract(self, url):
180 stream_id = self._match_id(url)
181 webpage = self._download_webpage(url, stream_id)
182 stream_data = self._search_json(
183 r'\\"video\\":', webpage, 'bibeltvData', stream_id,
184 transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
186 formats, subtitles = self._extract_formats_and_subtitles(
187 traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
189 return {
190 'id': stream_id,
191 'title': stream_data.get('title'),
192 'thumbnail': stream_data.get('poster'),
193 'is_live': True,
194 'formats': formats,
195 'subtitles': subtitles,