[ie/dplay] Fix extractors (#10471)
[yt-dlp3.git] / yt_dlp / extractor / crowdbunker.py
blobbf814570fec93497cf5ca4d5c39aa550bf74626e
1 import itertools
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 try_get,
7 unified_strdate,
11 class CrowdBunkerIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/v/(?P<id>[^/?#$&]+)'
14 _TESTS = [{
15 'url': 'https://crowdbunker.com/v/0z4Kms8pi8I',
16 'info_dict': {
17 'id': '0z4Kms8pi8I',
18 'ext': 'mp4',
19 'title': '117) Pass vax et solutions',
20 'description': 'md5:86bcb422c29475dbd2b5dcfa6ec3749c',
21 'view_count': int,
22 'duration': 5386,
23 'uploader': 'Jérémie Mercier',
24 'uploader_id': 'UCeN_qQV829NYf0pvPJhW5dQ',
25 'like_count': int,
26 'upload_date': '20211218',
27 'thumbnail': 'https://scw.divulg.org/cb-medias4/images/0z4Kms8pi8I/maxres.jpg',
29 'params': {'skip_download': True},
32 def _real_extract(self, url):
33 video_id = self._match_id(url)
34 data_json = self._download_json(
35 f'https://api.divulg.org/post/{video_id}/details', video_id,
36 headers={'accept': 'application/json, text/plain, */*'})
37 video_json = data_json['video']
38 formats, subtitles = [], {}
39 for sub in video_json.get('captions') or []:
40 sub_url = try_get(sub, lambda x: x['file']['url'])
41 if not sub_url:
42 continue
43 subtitles.setdefault(sub.get('languageCode', 'fr'), []).append({
44 'url': sub_url,
47 mpd_url = try_get(video_json, lambda x: x['dashManifest']['url'])
48 if mpd_url:
49 fmts, subs = self._extract_mpd_formats_and_subtitles(mpd_url, video_id)
50 formats.extend(fmts)
51 subtitles = self._merge_subtitles(subtitles, subs)
52 m3u8_url = try_get(video_json, lambda x: x['hlsManifest']['url'])
53 if m3u8_url:
54 fmts, subs = self._extract_m3u8_formats_and_subtitles(mpd_url, video_id)
55 formats.extend(fmts)
56 subtitles = self._merge_subtitles(subtitles, subs)
58 thumbnails = [{
59 'url': image['url'],
60 'height': int_or_none(image.get('height')),
61 'width': int_or_none(image.get('width')),
62 } for image in video_json.get('thumbnails') or [] if image.get('url')]
64 return {
65 'id': video_id,
66 'title': video_json.get('title'),
67 'description': video_json.get('description'),
68 'view_count': video_json.get('viewCount'),
69 'duration': video_json.get('duration'),
70 'uploader': try_get(data_json, lambda x: x['channel']['name']),
71 'uploader_id': try_get(data_json, lambda x: x['channel']['id']),
72 'like_count': data_json.get('likesCount'),
73 'upload_date': unified_strdate(video_json.get('publishedAt') or video_json.get('createdAt')),
74 'thumbnails': thumbnails,
75 'formats': formats,
76 'subtitles': subtitles,
80 class CrowdBunkerChannelIE(InfoExtractor):
81 _VALID_URL = r'https?://(?:www\.)?crowdbunker\.com/@(?P<id>[^/?#$&]+)'
83 _TESTS = [{
84 'url': 'https://crowdbunker.com/@Milan_UHRIN',
85 'playlist_mincount': 14,
86 'info_dict': {
87 'id': 'Milan_UHRIN',
91 def _entries(self, playlist_id):
92 last = None
94 for page in itertools.count():
95 channel_json = self._download_json(
96 f'https://api.divulg.org/organization/{playlist_id}/posts', playlist_id,
97 headers={'accept': 'application/json, text/plain, */*'},
98 query={'after': last} if last else {}, note=f'Downloading Page {page}')
99 for item in channel_json.get('items') or []:
100 v_id = item.get('uid')
101 if not v_id:
102 continue
103 yield self.url_result(
104 f'https://crowdbunker.com/v/{v_id}', ie=CrowdBunkerIE.ie_key(), video_id=v_id)
105 last = channel_json.get('last')
106 if not last:
107 break
109 def _real_extract(self, url):
110 playlist_id = self._match_id(url)
111 return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id)