3 from .common
import InfoExtractor
12 class MediaWorksNZVODIE(InfoExtractor
):
13 _VALID_URL_BASE_RE
= r
'https?://vodupload-api\.mediaworks\.nz/library/asset/published/'
14 _VALID_URL_ID_RE
= r
'(?P<id>[A-Za-z0-9-]+)'
15 _VALID_URL
= rf
'{_VALID_URL_BASE_RE}{_VALID_URL_ID_RE}'
17 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID00359',
21 'title': 'GRG Jacinda Ardern safe drug testing 1920x1080',
22 'description': 'md5:d4d7dc366742e86d8130b257dcb520ba',
24 'timestamp': 1604268608,
25 'upload_date': '20201101',
26 'thumbnail': r
're:^https?://.*\.jpg$',
27 'channel': 'George FM'
30 # has audio-only format
31 'url': 'https://vodupload-api.mediaworks.nz/library/asset/published/VID02627',
35 'title': 'Tova O\'Brien meets Ukraine President Volodymyr Zelensky',
36 'channel': 'Today FM',
37 'description': 'Watch in full the much anticipated interview of Volodymyr Zelensky',
39 'thumbnail': r
're:^https?://.*\.jpg$',
40 'upload_date': '20220822',
41 'timestamp': 1661152289,
43 'params': {'format': 'ba[ext=mp3]'}
47 'url': 'https://www.rova.nz/home/podcasts/socrates-walks-into-a-bar/the-trolley-problem---episode-1.html',
51 'title': 'The Trolley Problem',
54 'timestamp': 1658356489,
55 'thumbnail': r
're:^https?://.*\.jpg$',
56 'description': 'Socrates Walks Into A Bar Podcast Episode 1',
57 'upload_date': '20220720',
62 def _extract_embed_urls(cls
, url
, webpage
):
63 for mobj
in re
.finditer(
64 rf
'''(?x)<div\s+\bid=["']Player-Attributes-JWID[^>]+\b
65 data-request-url=["']{cls._VALID_URL_BASE_RE}["'][^>]+\b
66 data-asset-id=["']{cls._VALID_URL_ID_RE}["']''', webpage
68 yield f
'https://vodupload-api.mediaworks.nz/library/asset/published/{mobj.group("id")}'
70 def _real_extract(self
, url
):
71 video_id
= self
._match
_id
(url
)
72 asset
= self
._download
_json
(url
, video_id
)['asset']
74 if asset
.get('drm') not in ('NonDRM', None):
75 self
.report_drm(video_id
)
77 content_type
= asset
.get('type')
78 if content_type
and content_type
!= 'video':
79 self
.report_warning(f
'Unknown content type: {content_type}' + bug_reports_message(), video_id
)
81 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(asset
['streamingUrl'], video_id
)
83 audio_streaming_url
= traverse_obj(
84 asset
, 'palyoutPathAudio', 'playoutpathaudio', expected_type
=str)
85 if audio_streaming_url
:
86 audio_formats
= self
._extract
_m
3u8_formats
(audio_streaming_url
, video_id
, fatal
=False, ext
='mp3')
87 for audio_format
in audio_formats
:
88 # all the audio streams appear to be aac
89 audio_format
.setdefault('vcodec', 'none')
90 audio_format
.setdefault('acodec', 'aac')
91 formats
.append(audio_format
)
95 'title': asset
.get('title'),
96 'description': asset
.get('description'),
97 'duration': float_or_none(asset
.get('duration')),
98 'timestamp': unified_timestamp(asset
.get('dateadded')),
99 'channel': asset
.get('brand'),
100 'thumbnails': [{'url': thumbnail_url
} for thumbnail_url
in asset
.get('thumbnails') or []],
102 'subtitles': subtitles
,