1 from .common
import InfoExtractor
12 class NoicePodcastIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)'
15 'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2',
17 'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2',
20 'description': 'md5:58d1274e6857b6fbbecf47075885380d',
21 'release_date': '20221115',
22 'timestamp': 1668496642,
24 'upload_date': '20221115',
25 'release_timestamp': 1668496642,
26 'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)',
27 'modified_date': '20221121',
28 'categories': ['Bisnis dan Keuangan'],
30 'modified_timestamp': 1669030647,
31 'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560',
32 'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832',
34 'channel': 'Noice Space Talks',
37 'channel_follower_count': int,
40 'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063',
42 'id': '222134e4-99f2-456f-b8a2-b8be404bf063',
44 'release_timestamp': 1653488220,
45 'description': 'md5:35074f6190cef52b05dd133bb2ef460e',
46 'upload_date': '20220525',
47 'timestamp': 1653460637,
48 'release_date': '20220525',
49 'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625',
50 'title': 'Eps 1: Dijodohin Sama Anak Pak RT',
51 'modified_timestamp': 1669030647,
53 'modified_date': '20221121',
54 'categories': ['Cerita dan Drama'],
57 'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74',
61 'channel': 'Dear Jerome',
62 'channel_follower_count': int,
66 def _get_formats_and_subtitles(self
, media_url
, video_id
):
67 formats
, subtitles
= [], {}
68 for url
in variadic(media_url
):
69 ext
= determine_ext(url
)
71 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(url
, video_id
)
73 self
._merge
_subtitles
(subs
, target
=subtitles
)
81 return formats
, subtitles
83 def _real_extract(self
, url
):
84 display_id
= self
._match
_id
(url
)
85 webpage
= self
._download
_webpage
(url
, display_id
)
87 nextjs_data
= self
._search
_nextjs
_data
(webpage
, display_id
)['props']['pageProps']['contentDetails']
89 media_url_list
= traverse_obj(nextjs_data
, (('rawContentUrl', 'url'), ))
90 formats
, subtitles
= self
._get
_formats
_and
_subtitles
(media_url_list
, display_id
)
93 'id': nextjs_data
.get('id') or display_id
,
94 'title': nextjs_data
.get('title') or self
._html
_search
_meta
('og:title', webpage
),
96 'subtitles': subtitles
,
97 'description': (nextjs_data
.get('description') or clean_html(nextjs_data
.get('htmlDescription'))
98 or self
._html
_search
_meta
(['description', 'og:description'], webpage
)),
99 'thumbnail': nextjs_data
.get('image') or self
._html
_search
_meta
('og:image', webpage
),
100 'timestamp': parse_iso8601(nextjs_data
.get('createdAt')),
101 'release_timestamp': parse_iso8601(nextjs_data
.get('publishedAt')),
102 'modified_timestamp': parse_iso8601(
103 nextjs_data
.get('updatedAt') or self
._html
_search
_meta
('og:updated_time', webpage
)),
104 'duration': int_or_none(nextjs_data
.get('duration')),
105 'categories': traverse_obj(nextjs_data
, ('genres', ..., 'name')),
106 'season': nextjs_data
.get('seasonName'),
107 'season_number': int_or_none(nextjs_data
.get('seasonNumber')),
108 'channel': traverse_obj(nextjs_data
, ('catalog', 'title')),
109 'channel_id': traverse_obj(nextjs_data
, ('catalog', 'id'), 'catalogId'),
110 **traverse_obj(nextjs_data
, ('meta', 'aggregations', {
111 'like_count': 'likes',
112 'dislike_count': 'dislikes',
113 'comment_count': 'comments',
114 'channel_follower_count': 'followers',