5 from .common
import InfoExtractor
6 from .ninecninemedia
import NineCNineMediaIE
7 from ..utils
import extract_attributes
, orderedSet
8 from ..utils
.traversal
import find_element
, traverse_obj
11 class CTVNewsIE(InfoExtractor
):
12 _BASE_REGEX
= r
'https?://(?:[^.]+\.)?ctvnews\.ca/'
13 _VIDEO_ID_RE
= r
'(?P<id>\d{5,})'
14 _PLAYLIST_ID_RE
= r
'(?P<id>\d\.\d{5,})'
16 rf
'{_BASE_REGEX}video/c{_VIDEO_ID_RE}',
17 rf
'{_BASE_REGEX}video(?:-gallery)?/?\?clipId={_VIDEO_ID_RE}',
18 rf
'{_BASE_REGEX}video/?\?(?:playlist|bin)Id={_PLAYLIST_ID_RE}',
19 rf
'{_BASE_REGEX}(?!video/)[^?#]*?{_PLAYLIST_ID_RE}/?(?:$|[?#])',
20 rf
'{_BASE_REGEX}(?!video/)[^?#]+\?binId={_PLAYLIST_ID_RE}',
23 'url': 'http://www.ctvnews.ca/video?clipId=901995',
24 'md5': 'b608f466c7fa24b9666c6439d766ab7e',
28 'title': 'Extended: \'That person cannot be me\' Johnson says',
29 'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285',
30 'timestamp': 1467286284,
31 'upload_date': '20160630',
36 'series': 'CTV News National | Archive | Stories 2',
38 'thumbnail': r
're:https?://.*\.jpg$',
42 'url': 'https://barrie.ctvnews.ca/video/c3030933-here_s-what_s-making-news-for-nov--15?binId=1272429',
43 'md5': '8b8c2b33c5c1803e3c26bc74ff8694d5',
47 'title': 'Here’s what’s making news for Nov. 15',
48 'description': 'Here are the top stories we’re working on for CTV News at 11 for Nov. 15',
49 'thumbnail': 'http://images2.9c9media.com/image_asset/2021_2_22_a602e68e-1514-410e-a67a-e1f7cccbacab_png_2000x1125.jpg',
55 'series': 'CTV News Barrie',
56 'upload_date': '20241116',
58 'timestamp': 1731722452,
61 'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224',
66 'playlist_mincount': 19,
68 'url': 'http://www.ctvnews.ca/video?binId=1.2876780',
73 'playlist_mincount': 100,
75 'url': 'https://www.ctvnews.ca/it-s-been-23-years-since-toronto-called-in-the-army-after-a-major-snowstorm-1.5736957',
80 'playlist_mincount': 6,
82 'url': 'https://www.ctvnews.ca/business/respondents-to-bank-of-canada-questionnaire-largely-oppose-creating-a-digital-loonie-1.6665797',
83 'md5': '24bc4b88cdc17d8c3fc01dfc228ab72c',
88 'series': 'From CTV News Channel',
89 'description': 'md5:796a985a23cacc7e1e2fafefd94afd0a',
91 'title': 'Bank of Canada asks public about digital currency',
94 'upload_date': '20230526',
95 'season_number': 2023,
96 'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
97 'timestamp': 1685105157,
101 'url': 'https://stox.ctvnews.ca/video-gallery?clipId=582589',
102 'md5': '135cc592df607d29dddc931f1b756ae2',
107 'timestamp': 1427906183,
110 'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
111 'series': 'CTV News Stox',
112 'description': 'CTV original footage of the rise and fall of the Berlin Wall.',
113 'title': 'Berlin Wall',
114 'season_id': '63817',
115 'season': 'Season 0',
117 'upload_date': '20150401',
120 'url': 'https://ottawa.ctvnews.ca/features/regional-contact/regional-contact-archive?binId=1.1164587#3023759',
121 'md5': 'a14c0603557decc6531260791c23cc5e',
125 'season_number': 2024,
126 'timestamp': 1731798000,
128 'episode': 'Episode 125',
129 'description': 'CTV News Ottawa at Six',
130 'duration': 2712.076,
131 'episode_number': 125,
132 'upload_date': '20241116',
133 'title': 'CTV News Ottawa at Six for Saturday, November 16, 2024',
134 'thumbnail': 'http://images2.9c9media.com/image_asset/2019_3_28_35f5afc3-10f6-4d92-b194-8b9a86f55c6a_png_1920x1080.jpg',
137 'series': 'CTV News Ottawa at Six',
138 'season_id': '92667',
141 'url': 'http://www.ctvnews.ca/1.810401',
142 'only_matching': True,
144 'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231',
145 'only_matching': True,
147 'url': 'http://vancouverisland.ctvnews.ca/video?clipId=761241',
148 'only_matching': True,
151 def _ninecninemedia_url_result(self
, clip_id
):
152 return self
.url_result(f
'9c9media:ctvnews_web:{clip_id}', NineCNineMediaIE
, clip_id
)
154 def _real_extract(self
, url
):
155 page_id
= self
._match
_id
(url
)
157 if mobj
:= re
.fullmatch(self
._VIDEO
_ID
_RE
, urllib
.parse
.urlparse(url
).fragment
):
158 page_id
= mobj
.group('id')
160 if re
.fullmatch(self
._VIDEO
_ID
_RE
, page_id
):
161 return self
._ninecninemedia
_url
_result
(page_id
)
163 webpage
= self
._download
_webpage
(f
'https://www.ctvnews.ca/{page_id}', page_id
, query
={
164 'ot': 'example.AjaxPageLayout.ot',
165 'maxItemsPerPage': 1000000,
167 entries
= [self
._ninecninemedia
_url
_result
(clip_id
)
168 for clip_id
in orderedSet(re
.findall(r
'clip\.id\s*=\s*(\d+);', webpage
))]
170 webpage
= self
._download
_webpage
(url
, page_id
)
171 if 'getAuthStates("' in webpage
:
172 entries
= [self
._ninecninemedia
_url
_result
(clip_id
) for clip_id
in
173 self
._search
_regex
(r
'getAuthStates\("([\d+,]+)"', webpage
, 'clip ids').split(',')]
176 self
._ninecninemedia
_url
_result
(clip_id
) for clip_id
in
177 traverse_obj(webpage
, (
178 {find_element(tag
='jasper-player-container', html
=True)},
179 {extract_attributes}
, 'axis-ids', {json
.loads
}, ..., 'axisId', {str}
))
182 return self
.playlist_result(entries
, page_id
)