[ie/dplay] Fix extractors (#10471)
[yt-dlp3.git] / yt_dlp / extractor / cultureunplugged.py
blob8e6579c3552ab9faa95cf7204dd28302e5885566
1 import time
3 from .common import InfoExtractor
4 from ..networking import HEADRequest
5 from ..utils import int_or_none
8 class CultureUnpluggedIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?cultureunplugged\.com/documentary/watch-online/play/(?P<id>\d+)(?:/(?P<display_id>[^/]+))?'
10 _TESTS = [{
11 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662/The-Next--Best-West',
12 'md5': 'ac6c093b089f7d05e79934dcb3d228fc',
13 'info_dict': {
14 'id': '53662',
15 'display_id': 'The-Next--Best-West',
16 'ext': 'mp4',
17 'title': 'The Next, Best West',
18 'description': 'md5:0423cd00833dea1519cf014e9d0903b1',
19 'thumbnail': r're:^https?://.*\.jpg$',
20 'creator': 'Coldstream Creative',
21 'duration': 2203,
22 'view_count': int,
24 }, {
25 'url': 'http://www.cultureunplugged.com/documentary/watch-online/play/53662',
26 'only_matching': True,
29 def _real_extract(self, url):
30 mobj = self._match_valid_url(url)
31 video_id = mobj.group('id')
32 display_id = mobj.group('display_id') or video_id
34 # request setClientTimezone.php to get PHPSESSID cookie which is need to get valid json data in the next request
35 self._request_webpage(HEADRequest(
36 'http://www.cultureunplugged.com/setClientTimezone.php?timeOffset=%d' % -(time.timezone / 3600)), display_id)
37 movie_data = self._download_json(
38 f'http://www.cultureunplugged.com/movie-data/cu-{video_id}.json', display_id)
40 video_url = movie_data['url']
41 title = movie_data['title']
43 description = movie_data.get('synopsis')
44 creator = movie_data.get('producer')
45 duration = int_or_none(movie_data.get('duration'))
46 view_count = int_or_none(movie_data.get('views'))
48 thumbnails = [{
49 'url': movie_data[f'{size}_thumb'],
50 'id': size,
51 'preference': preference,
52 } for preference, size in enumerate((
53 'small', 'large')) if movie_data.get(f'{size}_thumb')]
55 return {
56 'id': video_id,
57 'display_id': display_id,
58 'url': video_url,
59 'title': title,
60 'description': description,
61 'creator': creator,
62 'duration': duration,
63 'view_count': view_count,
64 'thumbnails': thumbnails,