[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / prankcast.py
blob84e6f7ebcf8209045dba73e7a549368cafa776e6
1 import json
3 from .common import InfoExtractor
4 from ..utils import float_or_none, parse_iso8601, str_or_none, try_call
5 from ..utils.traversal import traverse_obj
8 class PrankCastIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/showreel/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
10 _TESTS = [{
11 'url': 'https://prankcast.com/Devonanustart/showreel/1561-Beverly-is-back-like-a-heart-attack-',
12 'info_dict': {
13 'id': '1561',
14 'ext': 'mp3',
15 'title': 'Beverly is back like a heart attack!',
16 'display_id': 'Beverly-is-back-like-a-heart-attack-',
17 'timestamp': 1661391575,
18 'uploader': 'Devonanustart',
19 'channel_id': '4',
20 'duration': 7918,
21 'cast': ['Devonanustart', 'Phonelosers'],
22 'description': '',
23 'categories': ['prank'],
24 'tags': ['prank call', 'prank', 'live show'],
25 'upload_date': '20220825',
27 }, {
28 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL',
29 'info_dict': {
30 'id': '2048',
31 'ext': 'mp3',
32 'title': 'NOT COOL',
33 'display_id': 'NOT-COOL',
34 'timestamp': 1665028364,
35 'uploader': 'phonelosers',
36 'channel_id': '6',
37 'duration': 4044,
38 'cast': ['phonelosers'],
39 'description': '',
40 'categories': ['prank'],
41 'tags': ['prank call', 'prank', 'live show'],
42 'upload_date': '20221006',
46 def _real_extract(self, url):
47 video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
49 webpage = self._download_webpage(url, video_id)
50 json_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_showreel']
52 uploader = json_info.get('user_name')
53 guests_json = self._parse_json(json_info.get('guests_json') or '{}', video_id)
54 start_date = parse_iso8601(json_info.get('start_date'))
56 return {
57 'id': video_id,
58 'title': json_info.get('broadcast_title') or self._og_search_title(webpage),
59 'display_id': display_id,
60 'url': f'{json_info["broadcast_url"]}{json_info["recording_hash"]}.mp3',
61 'timestamp': start_date,
62 'uploader': uploader,
63 'channel_id': str_or_none(json_info.get('user_id')),
64 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date),
65 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])),
66 'description': json_info.get('broadcast_description'),
67 'categories': [json_info.get('broadcast_category')],
68 'tags': try_call(lambda: json_info['broadcast_tags'].split(',')),
72 class PrankCastPostIE(InfoExtractor):
73 _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)'
74 _TESTS = [{
75 'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-',
76 'info_dict': {
77 'id': '6214',
78 'ext': 'mp3',
79 'title': 'Happy National Rachel Day!',
80 'display_id': 'happy-national-rachel-day-',
81 'timestamp': 1704333938,
82 'uploader': 'Devonanustart',
83 'channel_id': '4',
84 'duration': 13175,
85 'cast': ['Devonanustart'],
86 'description': '',
87 'categories': ['prank call'],
88 'upload_date': '20240104',
90 }, {
91 'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-',
92 'info_dict': {
93 'id': '6217',
94 'ext': 'mp3',
95 'title': 'Jake the Work Crow!',
96 'display_id': 'jake-the-work-crow-',
97 'timestamp': 1704346592,
98 'uploader': 'despicabledogs',
99 'channel_id': '957',
100 'duration': 263.287,
101 'cast': ['despicabledogs'],
102 'description': 'https://imgur.com/a/vtxLvKU',
103 'categories': [],
104 'upload_date': '20240104',
108 def _real_extract(self, url):
109 video_id, display_id = self._match_valid_url(url).group('id', 'display_id')
111 webpage = self._download_webpage(url, video_id)
112 post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts']
113 content = self._parse_json(post['post_contents_json'], video_id)[0]
115 uploader = post.get('user_name')
116 guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {}
118 return {
119 'id': video_id,
120 'title': post.get('post_title') or self._og_search_title(webpage),
121 'display_id': display_id,
122 'url': content.get('url'),
123 'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '),
124 'uploader': uploader,
125 'channel_id': str_or_none(post.get('user_id')),
126 'duration': float_or_none(content.get('duration')),
127 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])),
128 'description': post.get('post_body'),
129 'categories': list(filter(None, [content.get('category')])),
130 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))),
131 'subtitles': {
132 'live_chat': [{
133 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=',
134 'ext': 'json',
136 } if post.get('content_id') else None,