[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / extractor / nzz.py
blob047c4e1ac9ffa45f27d09859ca20db7e1837f04c
1 import re
3 from .common import InfoExtractor
6 class NZZIE(InfoExtractor):
7 _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
8 _TESTS = [{
9 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
10 'info_dict': {
11 'id': '9153',
13 'playlist_mincount': 6,
14 }, {
15 'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
16 'info_dict': {
17 'id': '1368112',
19 'playlist_count': 1,
22 def _entries(self, webpage, page_id):
23 for script in re.findall(r'(?s)<script[^>]* data-hid="jw-video-jw[^>]+>(.+?)</script>', webpage):
24 settings = self._search_json(r'var\s+settings\s*=[^{]*', script, 'settings', page_id, fatal=False)
25 if entry := self._parse_jwplayer_data(settings, page_id):
26 yield entry
28 def _real_extract(self, url):
29 page_id = self._match_id(url)
30 webpage = self._download_webpage(url, page_id)
32 return self.playlist_result(self._entries(webpage, page_id), page_id)