Release 2024.12.23
[yt-dlp.git] / yt_dlp / extractor / odkmedia.py
blob766cb941bb338231b39ecc180e2ca0f651e57858
1 import json
3 from .common import InfoExtractor
4 from ..networking.exceptions import HTTPError
5 from ..utils import (
6 ExtractorError,
7 GeoRestrictedError,
8 float_or_none,
9 traverse_obj,
10 try_call,
14 class OnDemandChinaEpisodeIE(InfoExtractor):
15 _VALID_URL = r'https?://www\.ondemandchina\.com/\w+/watch/(?P<series>[\w-]+)/(?P<id>ep-(?P<ep>\d+))'
16 _TESTS = [{
17 'url': 'https://www.ondemandchina.com/en/watch/together-against-covid-19/ep-1',
18 'info_dict': {
19 'id': '264394',
20 'ext': 'mp4',
21 'duration': 3256.88,
22 'title': 'EP 1 The Calling',
23 'alt_title': '第1集 令出如山',
24 'thumbnail': 'https://d2y2efdi5wgkcl.cloudfront.net/fit-in/256x256/media-io/2020/9/11/image.d9816e81.jpg',
25 'description': '疫情严峻,党政军民学、东西南北中协同应考',
26 'tags': ['Social Humanities', 'Documentary', 'Medical', 'Social'],
30 _QUERY = '''
31 query Episode($programSlug: String!, $episodeNumber: Int!) {
32 episode(
33 programSlug: $programSlug
34 episodeNumber: $episodeNumber
35 kind: "series"
36 part: null
37 ) {
39 title
40 titleEn
41 titleKo
42 titleZhHans
43 titleZhHant
44 synopsis
45 synopsisEn
46 synopsisKo
47 synopsisZhHans
48 synopsisZhHant
49 videoDuration
50 images {
51 thumbnail
54 }'''
56 def _real_extract(self, url):
57 program_slug, display_id, ep_number = self._match_valid_url(url).group('series', 'id', 'ep')
58 webpage = self._download_webpage(url, display_id)
60 video_info = self._download_json(
61 'https://odc-graphql.odkmedia.io/graphql', display_id,
62 headers={'Content-type': 'application/json'},
63 data=json.dumps({
64 'operationName': 'Episode',
65 'query': self._QUERY,
66 'variables': {
67 'programSlug': program_slug,
68 'episodeNumber': int(ep_number),
70 }).encode())['data']['episode']
72 try:
73 source_json = self._download_json(
74 f'https://odkmedia.io/odc/api/v2/playback/{video_info["id"]}/', display_id,
75 headers={'Authorization': '', 'service-name': 'odc'})
76 except ExtractorError as e:
77 if isinstance(e.cause, HTTPError):
78 error_data = self._parse_json(e.cause.response.read(), display_id)['detail']
79 raise GeoRestrictedError(error_data)
81 formats, subtitles = [], {}
82 for source in traverse_obj(source_json, ('sources', ...)):
83 if source.get('type') == 'hls':
84 fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('url'), display_id)
85 formats.extend(fmts)
86 self._merge_subtitles(subs, target=subtitles)
87 else:
88 self.report_warning(f'Unsupported format {source.get("type")}', display_id)
90 return {
91 'id': str(video_info['id']),
92 'duration': float_or_none(video_info.get('videoDuration'), 1000),
93 'thumbnail': (traverse_obj(video_info, ('images', 'thumbnail'))
94 or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
95 'title': (traverse_obj(video_info, 'title', 'titleEn')
96 or self._html_search_meta(['og:title', 'twitter:title'], webpage)
97 or self._html_extract_title(webpage)),
98 'alt_title': traverse_obj(video_info, 'titleKo', 'titleZhHans', 'titleZhHant'),
99 'description': (traverse_obj(
100 video_info, 'synopsisEn', 'synopsisKo', 'synopsisZhHans', 'synopsisZhHant', 'synopisis')
101 or self._html_search_meta(['og:description', 'twitter:description', 'description'], webpage)),
102 'formats': formats,
103 'subtitles': subtitles,
104 'tags': try_call(lambda: self._html_search_meta('keywords', webpage).split(', ')),