[ie/dplay] Fix extractors (#10471)
[yt-dlp3.git] / yt_dlp / extractor / canalc2.py
blobc725545fa2f059a3f687cdeb45853d7bb988d7f2
1 import re
3 from .common import InfoExtractor
4 from ..utils import parse_duration
7 class Canalc2IE(InfoExtractor):
8 IE_NAME = 'canalc2.tv'
9 _VALID_URL = r'https?://(?:(?:www\.)?canalc2\.tv/video/|archives-canalc2\.u-strasbg\.fr/video\.asp\?.*\bidVideo=)(?P<id>\d+)'
11 _TESTS = [{
12 'url': 'http://www.canalc2.tv/video/12163',
13 'md5': '060158428b650f896c542dfbb3d6487f',
14 'info_dict': {
15 'id': '12163',
16 'ext': 'mp4',
17 'title': 'Terrasses du Numérique',
18 'duration': 122,
20 }, {
21 'url': 'http://archives-canalc2.u-strasbg.fr/video.asp?idVideo=11427&voir=oui',
22 'only_matching': True,
25 def _real_extract(self, url):
26 video_id = self._match_id(url)
28 webpage = self._download_webpage(
29 f'http://www.canalc2.tv/video/{video_id}', video_id)
31 title = self._html_search_regex(
32 r'(?s)class="[^"]*col_description[^"]*">.*?<h3>(.+?)</h3>',
33 webpage, 'title')
35 formats = []
36 for _, video_url in re.findall(r'file\s*=\s*(["\'])(.+?)\1', webpage):
37 if video_url.startswith('rtmp://'):
38 rtmp = re.search(
39 r'^(?P<url>rtmp://[^/]+/(?P<app>.+/))(?P<play_path>mp4:.+)$', video_url)
40 formats.append({
41 'url': rtmp.group('url'),
42 'format_id': 'rtmp',
43 'ext': 'flv',
44 'app': rtmp.group('app'),
45 'play_path': rtmp.group('play_path'),
46 'page_url': url,
48 else:
49 formats.append({
50 'url': video_url,
51 'format_id': 'http',
54 if formats:
55 info = {
56 'formats': formats,
58 else:
59 info = self._parse_html5_media_entries(url, webpage, url)[0]
61 info.update({
62 'id': video_id,
63 'title': title,
64 'duration': parse_duration(self._search_regex(
65 r'id=["\']video_duree["\'][^>]*>([^<]+)',
66 webpage, 'duration', fatal=False)),
68 return info