yt_dlp/extractor/veoh.py

   1 import functools
   2 import json
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     OnDemandPagedList,
   8     int_or_none,
   9     parse_duration,
  10     qualities,
  11     try_get,
  12 )
  13
  14
  15 class VeohIE(InfoExtractor):
  16     _VALID_URL = r'https?://(?:www\.)?veoh\.com/(?:watch|videos|embed|iphone/#_Watch)/(?P<id>(?:v|e|yapi-)[\da-zA-Z]+)'
  17
  18     _TESTS = [{
  19         'url': 'http://www.veoh.com/watch/v56314296nk7Zdmz3',
  20         'md5': '620e68e6a3cff80086df3348426c9ca3',
  21         'info_dict': {
  22             'id': 'v56314296nk7Zdmz3',
  23             'ext': 'mp4',
  24             'title': 'Straight Backs Are Stronger',
  25             'description': 'md5:203f976279939a6dc664d4001e13f5f4',
  26             'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th56314296\\.jpg(\\?.*)?',
  27             'uploader': 'LUMOback',
  28             'duration': 46,
  29             'view_count': int,
  30             'average_rating': int,
  31             'comment_count': int,
  32             'age_limit': 0,
  33             'categories': ['technology_and_gaming'],
  34             'tags': ['posture', 'posture', 'sensor', 'back', 'pain', 'wearable', 'tech', 'lumo'],
  35         },
  36     }, {
  37         'url': 'http://www.veoh.com/embed/v56314296nk7Zdmz3',
  38         'only_matching': True,
  39     }, {
  40         'url': 'http://www.veoh.com/watch/v27701988pbTc4wzN?h1=Chile+workers+cover+up+to+avoid+skin+damage',
  41         'md5': '4a6ff84b87d536a6a71e6aa6c0ad07fa',
  42         'info_dict': {
  43             'id': '27701988',
  44             'ext': 'mp4',
  45             'title': 'Chile workers cover up to avoid skin damage',
  46             'description': 'md5:2bd151625a60a32822873efc246ba20d',
  47             'uploader': 'afp-news',
  48             'duration': 123,
  49         },
  50         'skip': 'This video has been deleted.',
  51     }, {
  52         'url': 'http://www.veoh.com/watch/v69525809F6Nc4frX',
  53         'md5': '4fde7b9e33577bab2f2f8f260e30e979',
  54         'note': 'Embedded ooyala video',
  55         'info_dict': {
  56             'id': '69525809',
  57             'ext': 'mp4',
  58             'title': 'Doctors Alter Plan For Preteen\'s Weight Loss Surgery',
  59             'description': 'md5:f5a11c51f8fb51d2315bca0937526891',
  60             'uploader': 'newsy-videos',
  61         },
  62         'skip': 'This video has been deleted.',
  63     }, {
  64         'url': 'http://www.veoh.com/watch/e152215AJxZktGS',
  65         'only_matching': True,
  66     }, {
  67         'url': 'https://www.veoh.com/videos/v16374379WA437rMH',
  68         'md5': 'cceb73f3909063d64f4b93d4defca1b3',
  69         'info_dict': {
  70             'id': 'v16374379WA437rMH',
  71             'ext': 'mp4',
  72             'title': 'Phantasmagoria 2, pt. 1-3',
  73             'description': 'Phantasmagoria: a Puzzle of Flesh',
  74             'thumbnail': 're:https://fcache\\.veoh\\.com/file/f/th16374379\\.jpg(\\?.*)?',
  75             'uploader': 'davidspackage',
  76             'duration': 968,
  77             'view_count': int,
  78             'average_rating': int,
  79             'comment_count': int,
  80             'age_limit': 18,
  81             'categories': ['technology_and_gaming', 'gaming'],
  82             'tags': ['puzzle', 'of', 'flesh'],
  83         },
  84     }]
  85
  86     def _real_extract(self, url):
  87         video_id = self._match_id(url)
  88         metadata = self._download_json(
  89             'https://www.veoh.com/watch/getVideo/' + video_id,
  90             video_id)
  91         video = metadata['video']
  92         title = video['title']
  93
  94         thumbnail_url = None
  95         q = qualities(['Regular', 'HQ'])
  96         formats = []
  97         for f_id, f_url in video.get('src', {}).items():
  98             if not f_url:
  99                 continue
 100             if f_id == 'poster':
 101                 thumbnail_url = f_url
 102             else:
 103                 formats.append({
 104                     'format_id': f_id,
 105                     'quality': q(f_id),
 106                     'url': f_url,
 107                 })
 108
 109         categories = metadata.get('categoryPath')
 110         if not categories:
 111             category = try_get(video, lambda x: x['category'].strip().removeprefix('category_'))
 112             categories = [category] if category else None
 113         tags = video.get('tags')
 114
 115         return {
 116             'id': video_id,
 117             'title': title,
 118             'description': video.get('description'),
 119             'thumbnail': thumbnail_url,
 120             'uploader': video.get('author', {}).get('nickname'),
 121             'duration': int_or_none(video.get('lengthBySec')) or parse_duration(video.get('length')),
 122             'view_count': int_or_none(video.get('views')),
 123             'formats': formats,
 124             'average_rating': int_or_none(video.get('rating')),
 125             'comment_count': int_or_none(video.get('numOfComments')),
 126             'age_limit': 18 if video.get('contentRatingId') == 2 else 0,
 127             'categories': categories,
 128             'tags': tags.split(', ') if tags else None,
 129         }
 130
 131
 132 class VeohUserIE(VeohIE):  # XXX: Do not subclass from concrete IE
 133     _VALID_URL = r'https?://(?:www\.)?veoh\.com/users/(?P<id>[\w-]+)'
 134     IE_NAME = 'veoh:user'
 135
 136     _TESTS = [
 137         {
 138             'url': 'https://www.veoh.com/users/valentinazoe',
 139             'info_dict': {
 140                 'id': 'valentinazoe',
 141                 'title': 'valentinazoe (Uploads)',
 142             },
 143             'playlist_mincount': 75,
 144         },
 145         {
 146             'url': 'https://www.veoh.com/users/PiensaLibre',
 147             'info_dict': {
 148                 'id': 'PiensaLibre',
 149                 'title': 'PiensaLibre (Uploads)',
 150             },
 151             'playlist_mincount': 2,
 152         }]
 153
 154     _PAGE_SIZE = 16
 155
 156     def _fetch_page(self, uploader, page):
 157         response = self._download_json(
 158             'https://www.veoh.com/users/published/videos', uploader,
 159             note=f'Downloading videos page {page + 1}',
 160             headers={
 161                 'x-csrf-token': self._TOKEN,
 162                 'content-type': 'application/json;charset=UTF-8',
 163             },
 164             data=json.dumps({
 165                 'username': uploader,
 166                 'maxResults': self._PAGE_SIZE,
 167                 'page': page + 1,
 168                 'requestName': 'userPage',
 169             }).encode())
 170         if not response.get('success'):
 171             raise ExtractorError(response['message'])
 172
 173         for video in response['videos']:
 174             yield self.url_result(f'https://www.veoh.com/watch/{video["permalinkId"]}', VeohIE,
 175                                   video['permalinkId'], video.get('title'))
 176
 177     def _real_initialize(self):
 178         webpage = self._download_webpage(
 179             'https://www.veoh.com', None, note='Downloading authorization token')
 180         self._TOKEN = self._search_regex(
 181             r'csrfToken:\s*(["\'])(?P<token>[0-9a-zA-Z]{40})\1', webpage,
 182             'request token', group='token')
 183
 184     def _real_extract(self, url):
 185         uploader = self._match_id(url)
 186         return self.playlist_result(OnDemandPagedList(
 187             functools.partial(self._fetch_page, uploader),
 188             self._PAGE_SIZE), uploader, f'{uploader} (Uploads)')