[ie/learningonscreen] Add extractor (#10590)
[yt-dlp3.git] / yt_dlp / extractor / learningonscreen.py
blobdcf83144c83a41c2c94db01f84ea001ec72db7e1
1 import functools
2 import re
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 clean_html,
8 extract_attributes,
9 get_element_by_class,
10 get_element_html_by_id,
11 join_nonempty,
12 parse_duration,
13 unified_timestamp,
15 from ..utils.traversal import traverse_obj
18 class LearningOnScreenIE(InfoExtractor):
19 _VALID_URL = r'https?://learningonscreen\.ac\.uk/ondemand/index\.php/prog/(?P<id>\w+)'
20 _TESTS = [{
21 'url': 'https://learningonscreen.ac.uk/ondemand/index.php/prog/005D81B2?bcast=22757013',
22 'info_dict': {
23 'id': '005D81B2',
24 'ext': 'mp4',
25 'title': 'Planet Earth',
26 'duration': 3600.0,
27 'timestamp': 1164567600.0,
28 'upload_date': '20061126',
29 'thumbnail': 'https://stream.learningonscreen.ac.uk/trilt-cover-images/005D81B2-Planet-Earth-2006-11-26T190000Z-BBC4.jpg',
33 def _real_initialize(self):
34 if not self._get_cookies('https://learningonscreen.ac.uk/').get('PHPSESSID-BOB-LIVE'):
35 self.raise_login_required(
36 'Use --cookies for authentication. See '
37 ' https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp '
38 'for how to manually pass cookies', method=None)
40 def _real_extract(self, url):
41 video_id = self._match_id(url)
42 webpage = self._download_webpage(url, video_id)
44 details = traverse_obj(webpage, (
45 {functools.partial(get_element_html_by_id, 'programme-details')}, {
46 'title': ({functools.partial(re.search, r'<h2>([^<]+)</h2>')}, 1, {clean_html}),
47 'timestamp': (
48 {functools.partial(get_element_by_class, 'broadcast-date')},
49 {functools.partial(re.match, r'([^<]+)')}, 1, {unified_timestamp}),
50 'duration': (
51 {functools.partial(get_element_by_class, 'prog-running-time')},
52 {clean_html}, {parse_duration}),
53 }))
55 title = details.pop('title', None) or traverse_obj(webpage, (
56 {functools.partial(get_element_html_by_id, 'add-to-existing-playlist')},
57 {extract_attributes}, 'data-record-title', {clean_html}))
59 entries = self._parse_html5_media_entries(
60 'https://stream.learningonscreen.ac.uk', webpage, video_id, m3u8_id='hls', mpd_id='dash',
61 _headers={'Origin': 'https://learningonscreen.ac.uk', 'Referer': 'https://learningonscreen.ac.uk/'})
62 if not entries:
63 raise ExtractorError('No video found')
65 if len(entries) > 1:
66 duration = details.pop('duration', None)
67 for idx, entry in enumerate(entries, start=1):
68 entry.update(details)
69 entry['id'] = join_nonempty(video_id, idx)
70 entry['title'] = join_nonempty(title, idx)
71 return self.playlist_result(entries, video_id, title, duration=duration)
73 return {
74 **entries[0],
75 **details,
76 'id': video_id,
77 'title': title,