yt_dlp/extractor/netverse.py

   1 import itertools
   2
   3 from .common import InfoExtractor, SearchInfoExtractor
   4 from .dailymotion import DailymotionIE
   5 from ..utils import smuggle_url, traverse_obj
   6
   7
   8 class NetverseBaseIE(InfoExtractor):
   9     _ENDPOINTS = {
  10         'watch': 'watchvideo',
  11         'video': 'watchvideo',
  12         'webseries': 'webseries',
  13         'season': 'webseason_videos',
  14     }
  15
  16     def _call_api(self, slug, endpoint, query={}, season_id='', display_id=None):
  17         return self._download_json(
  18             f'https://api.netverse.id/medias/api/v2/{self._ENDPOINTS[endpoint]}/{slug}/{season_id}',
  19             display_id or slug, query=query)
  20
  21     def _get_comments(self, video_id):
  22         last_page_number = None
  23         for i in itertools.count(1):
  24             comment_data = self._download_json(
  25                 f'https://api.netverse.id/mediadetails/api/v3/videos/comments/{video_id}',
  26                 video_id, data=b'', fatal=False, query={'page': i},
  27                 note=f'Downloading JSON comment metadata page {i}') or {}
  28             yield from traverse_obj(comment_data, ('response', 'comments', 'data', ..., {
  29                 'id': '_id',
  30                 'text': 'comment',
  31                 'author_id': 'customer_id',
  32                 'author': ('customer', 'name'),
  33                 'author_thumbnail': ('customer', 'profile_picture'),
  34             }))
  35
  36             if not last_page_number:
  37                 last_page_number = traverse_obj(comment_data, ('response', 'comments', 'last_page'))
  38             if i >= (last_page_number or 0):
  39                 break
  40
  41
  42 class NetverseIE(NetverseBaseIE):
  43     _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>watch|video)/(?P<display_id>[^/?#&]+)'
  44     _TESTS = [{
  45         # Watch video
  46         'url': 'https://www.netverse.id/watch/waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
  47         'info_dict': {
  48             'id': 'k4yhqUwINAGtmHx3NkL',
  49             'title': 'Waktu Indonesia Bercanda - Edisi Spesial Lebaran 2016',
  50             'ext': 'mp4',
  51             'season': 'Season 2016',
  52             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
  53             'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  54             'episode_number': 22,
  55             'episode': 'Episode 22',
  56             'uploader_id': 'x2ir3vq',
  57             'age_limit': 0,
  58             'tags': [],
  59             'view_count': int,
  60             'display_id': 'waktu-indonesia-bercanda-edisi-spesial-lebaran-2016',
  61             'duration': 2990,
  62             'upload_date': '20210722',
  63             'timestamp': 1626919804,
  64             'like_count': int,
  65             'uploader': 'Net Prime',
  66         },
  67     }, {
  68         # series
  69         'url': 'https://www.netverse.id/watch/jadoo-seorang-model',
  70         'info_dict': {
  71             'id': 'x88izwc',
  72             'title': 'Jadoo Seorang Model',
  73             'ext': 'mp4',
  74             'season': 'Season 2',
  75             'description': 'md5:8a74f70812cca267e19ee0635f0af835',
  76             'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
  77             'episode_number': 2,
  78             'episode': 'Episode 2',
  79             'view_count': int,
  80             'like_count': int,
  81             'display_id': 'jadoo-seorang-model',
  82             'uploader_id': 'x2ir3vq',
  83             'duration': 635,
  84             'timestamp': 1646372927,
  85             'tags': ['PG069497-hellojadooseason2eps2'],
  86             'upload_date': '20220304',
  87             'uploader': 'Net Prime',
  88             'age_limit': 0,
  89         },
  90         'skip': 'video get Geo-blocked for some country',
  91     }, {
  92         # non www host
  93         'url': 'https://netverse.id/watch/tetangga-baru',
  94         'info_dict': {
  95             'id': 'k4CNGz7V0HJ7vfwZbXy',
  96             'ext': 'mp4',
  97             'title': 'Tetangga Baru',
  98             'season': 'Season 1',
  99             'description': 'md5:23fcf70e97d461d3029d25d59b2ccfb9',
 100             'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
 101             'episode_number': 1,
 102             'episode': 'Episode 1',
 103             'timestamp': 1624538169,
 104             'view_count': int,
 105             'upload_date': '20210624',
 106             'age_limit': 0,
 107             'uploader_id': 'x2ir3vq',
 108             'like_count': int,
 109             'uploader': 'Net Prime',
 110             'tags': ['PG008534', 'tetangga', 'Baru'],
 111             'display_id': 'tetangga-baru',
 112             'duration': 1406,
 113         },
 114     }, {
 115         # /video url
 116         'url': 'https://www.netverse.id/video/pg067482-hellojadoo-season1',
 117         'title': 'Namaku Choi Jadoo',
 118         'info_dict': {
 119             'id': 'x887jzz',
 120             'ext': 'mp4',
 121             'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
 122             'season': 'Season 1',
 123             'episode_number': 1,
 124             'description': 'md5:d4f627b3e7a3f9acdc55f6cdd5ea41d5',
 125             'title': 'Namaku Choi Jadoo',
 126             'episode': 'Episode 1',
 127             'age_limit': 0,
 128             'like_count': int,
 129             'view_count': int,
 130             'tags': ['PG067482', 'PG067482-HelloJadoo-season1'],
 131             'duration': 780,
 132             'display_id': 'pg067482-hellojadoo-season1',
 133             'uploader_id': 'x2ir3vq',
 134             'uploader': 'Net Prime',
 135             'timestamp': 1645764984,
 136             'upload_date': '20220225',
 137         },
 138         'skip': 'This video get Geo-blocked for some country',
 139     }, {
 140         # video with comments
 141         'url': 'https://netverse.id/video/episode-1-season-2016-ok-food',
 142         'info_dict': {
 143             'id': 'k6hetBPiQMljSxxvAy7',
 144             'ext': 'mp4',
 145             'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
 146             'display_id': 'episode-1-season-2016-ok-food',
 147             'like_count': int,
 148             'description': '',
 149             'duration': 1471,
 150             'age_limit': 0,
 151             'timestamp': 1642405848,
 152             'episode_number': 1,
 153             'season': 'Season 2016',
 154             'uploader_id': 'x2ir3vq',
 155             'title': 'Episode 1 - Season 2016 - Ok Food',
 156             'upload_date': '20220117',
 157             'tags': [],
 158             'view_count': int,
 159             'episode': 'Episode 1',
 160             'uploader': 'Net Prime',
 161             'comment_count': int,
 162         },
 163         'params': {
 164             'getcomments': True,
 165         },
 166     }, {
 167         # video with multiple page comment
 168         'url': 'https://netverse.id/video/match-island-eps-1-fix',
 169         'info_dict': {
 170             'id': 'x8aznjc',
 171             'ext': 'mp4',
 172             'like_count': int,
 173             'tags': ['Match-Island', 'Pd00111'],
 174             'display_id': 'match-island-eps-1-fix',
 175             'view_count': int,
 176             'episode': 'Episode 1',
 177             'uploader': 'Net Prime',
 178             'duration': 4070,
 179             'timestamp': 1653068165,
 180             'description': 'md5:e9cf3b480ad18e9c33b999e3494f223f',
 181             'age_limit': 0,
 182             'title': 'Welcome To Match Island',
 183             'upload_date': '20220520',
 184             'episode_number': 1,
 185             'thumbnail': r're:https?://s\d+\.dmcdn\.net/v/[^/]+/x1080',
 186             'uploader_id': 'x2ir3vq',
 187             'season': 'Season 1',
 188             'comment_count': int,
 189         },
 190         'params': {
 191             'getcomments': True,
 192         },
 193     }]
 194
 195     def _real_extract(self, url):
 196         display_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
 197         program_json = self._call_api(display_id, sites_type)
 198         videos = program_json['response']['videos']
 199
 200         return {
 201             '_type': 'url_transparent',
 202             'ie_key': DailymotionIE.ie_key(),
 203             'url': smuggle_url(videos['dailymotion_url'], {'query': {'embedder': 'https://www.netverse.id'}}),
 204             'display_id': display_id,
 205             'title': videos.get('title'),
 206             'season': videos.get('season_name'),
 207             'thumbnail': traverse_obj(videos, ('program_detail', 'thumbnail_image')),
 208             'description': traverse_obj(videos, ('program_detail', 'description')),
 209             'episode_number': videos.get('episode_order'),
 210             '__post_extractor': self.extract_comments(display_id),
 211         }
 212
 213
 214 class NetversePlaylistIE(NetverseBaseIE):
 215     _VALID_URL = r'https?://(?:\w+\.)?netverse\.id/(?P<type>webseries)/(?P<display_id>[^/?#&]+)'
 216     _TESTS = [{
 217         # multiple season
 218         'url': 'https://netverse.id/webseries/tetangga-masa-gitu',
 219         'info_dict': {
 220             'id': 'tetangga-masa-gitu',
 221             'title': 'Tetangga Masa Gitu',
 222         },
 223         'playlist_count': 519,
 224     }, {
 225         # single season
 226         'url': 'https://netverse.id/webseries/kelas-internasional',
 227         'info_dict': {
 228             'id': 'kelas-internasional',
 229             'title': 'Kelas Internasional',
 230         },
 231         'playlist_count': 203,
 232     }]
 233
 234     def parse_playlist(self, json_data, playlist_id):
 235         slug_sample = traverse_obj(json_data, ('related', 'data', ..., 'slug'))[0]
 236         for season in traverse_obj(json_data, ('seasons', ..., 'id')):
 237             playlist_json = self._call_api(
 238                 slug_sample, 'season', display_id=playlist_id, season_id=season)
 239
 240             for current_page in range(playlist_json['response']['season_list']['last_page']):
 241                 playlist_json = self._call_api(slug_sample, 'season', query={'page': current_page + 1},
 242                                                season_id=season, display_id=playlist_id)
 243                 for slug in traverse_obj(playlist_json, ('response', ..., 'data', ..., 'slug')):
 244                     yield self.url_result(f'https://www.netverse.id/video/{slug}', NetverseIE)
 245
 246     def _real_extract(self, url):
 247         playlist_id, sites_type = self._match_valid_url(url).group('display_id', 'type')
 248         playlist_data = self._call_api(playlist_id, sites_type)
 249
 250         return self.playlist_result(
 251             self.parse_playlist(playlist_data['response'], playlist_id),
 252             traverse_obj(playlist_data, ('response', 'webseries_info', 'slug')),
 253             traverse_obj(playlist_data, ('response', 'webseries_info', 'title')))
 254
 255
 256 class NetverseSearchIE(SearchInfoExtractor):
 257     _SEARCH_KEY = 'netsearch'
 258
 259     _TESTS = [{
 260         'url': 'netsearch10:tetangga',
 261         'info_dict': {
 262             'id': 'tetangga',
 263             'title': 'tetangga',
 264         },
 265         'playlist_count': 10,
 266     }]
 267
 268     def _search_results(self, query):
 269         last_page = None
 270         for i in itertools.count(1):
 271             search_data = self._download_json(
 272                 'https://api.netverse.id/search/elastic/search', query,
 273                 query={'q': query, 'page': i}, note=f'Downloading page {i}')
 274
 275             videos = traverse_obj(search_data, ('response', 'data', ...))
 276             for video in videos:
 277                 yield self.url_result(f'https://netverse.id/video/{video["slug"]}', NetverseIE)
 278
 279             last_page = last_page or traverse_obj(search_data, ('response', 'lastpage'))
 280             if not videos or i >= (last_page or 0):
 281                 break