[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / boxcast.py
blobefa66994aaadd2c8b24ff1967a3159c7b204fd18
1 from .common import InfoExtractor
2 from ..utils import js_to_json, traverse_obj, unified_timestamp
5 class BoxCastVideoIE(InfoExtractor):
6 _VALID_URL = r'''(?x)
7 https?://boxcast\.tv/(?:
8 view-embed/|
9 channel/\w+\?(?:[^#]+&)?b=|
10 video-portal/(?:\w+/){2}
11 )(?P<id>[\w-]+)'''
12 _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>https?://boxcast\.tv/view-embed/[\w-]+)']
13 _TESTS = [{
14 'url': 'https://boxcast.tv/view-embed/in-the-midst-of-darkness-light-prevails-an-interdisciplinary-symposium-ozmq5eclj50ujl4bmpwx',
15 'info_dict': {
16 'id': 'da1eqqgkacngd5djlqld',
17 'ext': 'mp4',
18 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
19 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
20 'release_timestamp': 1670686812,
21 'release_date': '20221210',
22 'uploader_id': 're8w0v8hohhvpqtbskpe',
23 'uploader': 'Children\'s Health Defense',
25 }, {
26 'url': 'https://boxcast.tv/video-portal/vctwevwntun3o0ikq7af/rvyblnn0fxbfjx5nwxhl/otbpltj2kzkveo2qz3ad',
27 'info_dict': {
28 'id': 'otbpltj2kzkveo2qz3ad',
29 'ext': 'mp4',
30 'uploader_id': 'vctwevwntun3o0ikq7af',
31 'uploader': 'Legacy Christian Church',
32 'title': 'The Quest | 1: Beginner\'s Bay | Jamie Schools',
33 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg',
35 }, {
36 'url': 'https://boxcast.tv/channel/z03fqwaeaby5lnaawox2?b=ssihlw5gvfij2by8tkev',
37 'info_dict': {
38 'id': 'ssihlw5gvfij2by8tkev',
39 'ext': 'mp4',
40 'thumbnail': r're:https?://uploads.boxcast.com/(?:[\w-]+/){3}.+\.jpg$',
41 'release_date': '20230101',
42 'uploader_id': 'ds25vaazhlu4ygcvffid',
43 'release_timestamp': 1672543201,
44 'uploader': 'Lighthouse Ministries International - Beltsville, Maryland',
45 'description': 'md5:ac23e3d01b0b0be592e8f7fe0ec3a340',
46 'title': 'New Year\'s Eve CROSSOVER Service at LHMI | December 31, 2022',
49 _WEBPAGE_TESTS = [{
50 'url': 'https://childrenshealthdefense.eu/live-stream/',
51 'info_dict': {
52 'id': 'da1eqqgkacngd5djlqld',
53 'ext': 'mp4',
54 'thumbnail': r're:https?://uploads\.boxcast\.com/(?:[\w+-]+/){3}.+\.png$',
55 'title': 'In the Midst of Darkness Light Prevails: An Interdisciplinary Symposium',
56 'release_timestamp': 1670686812,
57 'release_date': '20221210',
58 'uploader_id': 're8w0v8hohhvpqtbskpe',
59 'uploader': 'Children\'s Health Defense',
63 def _real_extract(self, url):
64 display_id = self._match_id(url)
65 webpage = self._download_webpage(url, display_id)
66 webpage_json_data = self._search_json(
67 r'var\s*BOXCAST_PRELOAD\s*=', webpage, 'broadcast data', display_id,
68 transform_source=js_to_json, default={})
70 # Ref: https://support.boxcast.com/en/articles/4235158-build-a-custom-viewer-experience-with-boxcast-api
71 broadcast_json_data = (
72 traverse_obj(webpage_json_data, ('broadcast', 'data'))
73 or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}', display_id))
74 view_json_data = (
75 traverse_obj(webpage_json_data, ('view', 'data'))
76 or self._download_json(f'https://api.boxcast.com/broadcasts/{display_id}/view',
77 display_id, fatal=False) or {})
79 formats, subtitles = [], {}
80 if view_json_data.get('status') == 'recorded':
81 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
82 view_json_data['playlist'], display_id)
84 return {
85 'id': str(broadcast_json_data['id']),
86 'title': (broadcast_json_data.get('name')
87 or self._html_search_meta(['og:title', 'twitter:title'], webpage)),
88 'description': (broadcast_json_data.get('description')
89 or self._html_search_meta(['og:description', 'twitter:description'], webpage)
90 or None),
91 'thumbnail': (broadcast_json_data.get('preview')
92 or self._html_search_meta(['og:image', 'twitter:image'], webpage)),
93 'formats': formats,
94 'subtitles': subtitles,
95 'release_timestamp': unified_timestamp(broadcast_json_data.get('streamed_at')),
96 'uploader': broadcast_json_data.get('account_name'),
97 'uploader_id': broadcast_json_data.get('account_id'),