yt_dlp/extractor/bibeltv.py

   1 import functools
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     clean_html,
   7     determine_ext,
   8     format_field,
   9     int_or_none,
  10     js_to_json,
  11     orderedSet,
  12     parse_iso8601,
  13     traverse_obj,
  14     url_or_none,
  15 )
  16
  17
  18 class BibelTVBaseIE(InfoExtractor):
  19     _GEO_COUNTRIES = ['AT', 'CH', 'DE']
  20     _GEO_BYPASS = False
  21
  22     API_URL = 'https://www.bibeltv.de/mediathek/api'
  23     AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
  24
  25     def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
  26         formats = []
  27         subtitles = {}
  28         for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
  29             media_ext = determine_ext(media_url)
  30             if media_ext == 'm3u8':
  31                 m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
  32                     media_url, crn_id, live=is_live)
  33                 formats.extend(m3u8_formats)
  34                 subtitles.update(m3u8_subs)
  35             elif media_ext == 'mpd':
  36                 mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
  37                 formats.extend(mpd_formats)
  38                 subtitles.update(mpd_subs)
  39             elif media_ext == 'mp4':
  40                 formats.append({'url': media_url})
  41             else:
  42                 self.report_warning(f'Unknown format {media_ext!r}')
  43
  44         return formats, subtitles
  45
  46     @staticmethod
  47     def _extract_base_info(data):
  48         return {
  49             'id': data['crn'],
  50             **traverse_obj(data, {
  51                 'title': 'title',
  52                 'description': 'description',
  53                 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
  54                 'timestamp': ('schedulingStart', {parse_iso8601}),
  55                 'season_number': 'seasonNumber',
  56                 'episode_number': 'episodeNumber',
  57                 'view_count': 'viewCount',
  58                 'like_count': 'likeCount',
  59             }),
  60             'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
  61                 'url': ('url', {url_or_none}),
  62             }))),
  63         }
  64
  65     def _extract_url_info(self, data):
  66         return {
  67             '_type': 'url',
  68             'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
  69             **self._extract_base_info(data),
  70         }
  71
  72     def _extract_video_info(self, data):
  73         crn_id = data['crn']
  74
  75         if data.get('drm'):
  76             self.report_drm(crn_id)
  77
  78         json_data = self._download_json(
  79             format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
  80             headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
  81             errnote='No formats available') or {}
  82
  83         formats, subtitles = self._extract_formats_and_subtitles(
  84             traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
  85
  86         return {
  87             '_type': 'video',
  88             **self._extract_base_info(data),
  89             'formats': formats,
  90             'subtitles': subtitles,
  91         }
  92
  93
  94 class BibelTVVideoIE(BibelTVBaseIE):
  95     IE_DESC = 'BibelTV single video'
  96     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
  97     IE_NAME = 'bibeltv:video'
  98
  99     _TESTS = [{
 100         'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
 101         'md5': 'ec1c07efe54353780512e8a4103b612e',
 102         'info_dict': {
 103             'id': '344436',
 104             'ext': 'mp4',
 105             'title': 'Alte Wege',
 106             'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
 107             'timestamp': 1677877071,
 108             'duration': 150.0,
 109             'upload_date': '20230303',
 110             'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
 111             'episode': 'Episode 1',
 112             'episode_number': 1,
 113             'view_count': int,
 114             'like_count': int,
 115         },
 116         'params': {
 117             'format': '6',
 118         },
 119     }]
 120
 121     def _real_extract(self, url):
 122         crn_id = self._match_id(url)
 123         video_data = traverse_obj(
 124             self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
 125             ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
 126         if not video_data:
 127             raise ExtractorError('Missing video data.')
 128
 129         return self._extract_video_info(video_data)
 130
 131
 132 class BibelTVSeriesIE(BibelTVBaseIE):
 133     IE_DESC = 'BibelTV series playlist'
 134     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
 135     IE_NAME = 'bibeltv:series'
 136
 137     _TESTS = [{
 138         'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
 139         'playlist_mincount': 400,
 140         'info_dict': {
 141             'id': '333485',
 142             'title': 'Ein Wunder für jeden Tag',
 143             'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
 144         },
 145     }]
 146
 147     def _real_extract(self, url):
 148         crn_id = self._match_id(url)
 149         webpage = self._download_webpage(url, crn_id)
 150         nextjs_data = self._search_nextjs_data(webpage, crn_id)
 151         series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
 152         if not series_data:
 153             raise ExtractorError('Missing series data.')
 154
 155         return self.playlist_result(
 156             traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
 157             crn_id, series_data.get('title'), clean_html(series_data.get('description')))
 158
 159
 160 class BibelTVLiveIE(BibelTVBaseIE):
 161     IE_DESC = 'BibelTV live program'
 162     _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
 163     IE_NAME = 'bibeltv:live'
 164
 165     _TESTS = [{
 166         'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
 167         'info_dict': {
 168             'id': 'bibeltv',
 169             'ext': 'mp4',
 170             'title': 're:Bibel TV',
 171             'live_status': 'is_live',
 172             'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
 173         },
 174         'params': {'skip_download': 'm3u8'},
 175     }, {
 176         'url': 'https://www.bibeltv.de/livestreams/impuls/',
 177         'only_matching': True,
 178     }]
 179
 180     def _real_extract(self, url):
 181         stream_id = self._match_id(url)
 182         webpage = self._download_webpage(url, stream_id)
 183         stream_data = self._search_json(
 184             r'\\"video\\":', webpage, 'bibeltvData', stream_id,
 185             transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
 186
 187         formats, subtitles = self._extract_formats_and_subtitles(
 188             traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
 189
 190         return {
 191             'id': stream_id,
 192             'title': stream_data.get('title'),
 193             'thumbnail': stream_data.get('poster'),
 194             'is_live': True,
 195             'formats': formats,
 196             'subtitles': subtitles,
 197         }