1 from .common
import InfoExtractor
2 from ..utils
import extract_attributes
, merge_dicts
, remove_end
5 class RheinMainTVIE(InfoExtractor
):
6 _VALID_URL
= r
'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)'
8 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/',
10 'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022',
11 'ext': 'ismv', # ismv+isma will be merged into mp4
12 'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft',
13 'title': 'Auf dem Weg zur Deutschen Meisterschaft',
14 'upload_date': '20221108',
16 'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft',
17 'thumbnail': r
're:^https://.+\.jpg',
18 'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9',
19 'timestamp': 1667933057,
22 'params': {'skip_download': 'ism'},
24 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
26 'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022',
28 'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
29 'timestamp': 1668526214,
30 'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften',
31 'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften',
33 'thumbnail': r
're:^https://.+\.jpg',
35 'description': 'md5:9370ba29526984006c2cba1372e5c5a0',
36 'upload_date': '20221115',
38 'params': {'skip_download': 'ism'},
40 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/',
42 'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022',
44 'title': 'Casino Mainz bei den Deutschen Meisterschaften',
46 'timestamp': 1668527402,
47 'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften',
48 'upload_date': '20221115',
49 'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften',
51 'thumbnail': r
're:^https://.+\.jpg',
52 'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa',
54 'params': {'skip_download': 'ism'},
56 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/',
57 'only_matching': True,
60 def _real_extract(self
, url
):
61 mobj
= self
._match
_valid
_url
(url
)
62 display_id
= mobj
.group('display_id')
63 video_id
= mobj
.group('video_id').replace('/', '-')
64 webpage
= self
._download
_webpage
(url
, video_id
)
66 source
, img
= self
._search
_regex
(r
'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)',
67 webpage
, 'video', group
=('source', 'img'))
68 source
= extract_attributes(source
)
69 img
= extract_attributes(img
)
71 raw_json_ld
= list(self
._yield
_json
_ld
(webpage
, video_id
))
72 json_ld
= self
._json
_ld
(raw_json_ld
, video_id
)
73 json_ld
.pop('url', None)
77 or next(json_ld
.get('embedUrl') for json_ld
in raw_json_ld
if json_ld
.get('@type') == 'VideoObject')
79 formats
, subtitles
= self
._extract
_ism
_formats
_and
_subtitles
(ism_manifest_url
, video_id
)
83 'display_id': display_id
,
85 self
._html
_search
_regex
(r
'<h1><span class="title">([^<]*)</span>',
86 webpage
, 'headline', default
=None)
87 or img
.get('title') or json_ld
.get('title') or self
._og
_search
_title
(webpage
)
88 or remove_end(self
._html
_extract
_title
(webpage
), ' -'),
89 'alt_title': img
.get('alt'),
90 'description': json_ld
.get('description') or self
._og
_search
_description
(webpage
),
92 'subtitles': subtitles
,
93 'thumbnails': [{'url': img
['src']}] if 'src' in img
else json_ld
.get('thumbnails'),