yt_dlp/extractor/redgifs.py

   1 import functools
   2 import urllib.parse
   3
   4 from .common import InfoExtractor
   5 from ..networking.exceptions import HTTPError
   6 from ..utils import (
   7     ExtractorError,
   8     OnDemandPagedList,
   9     int_or_none,
  10     qualities,
  11     try_get,
  12 )
  13
  14
  15 class RedGifsBaseInfoExtractor(InfoExtractor):
  16     _FORMATS = {
  17         'gif': 250,
  18         'sd': 480,
  19         'hd': None,
  20     }
  21
  22     _API_HEADERS = {
  23         'referer': 'https://www.redgifs.com/',
  24         'origin': 'https://www.redgifs.com',
  25         'content-type': 'application/json',
  26     }
  27
  28     def _parse_gif_data(self, gif_data):
  29         video_id = gif_data.get('id')
  30         quality = qualities(tuple(self._FORMATS.keys()))
  31
  32         orig_height = int_or_none(gif_data.get('height'))
  33         aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
  34
  35         formats = []
  36         for format_id, height in self._FORMATS.items():
  37             video_url = gif_data['urls'].get(format_id)
  38             if not video_url:
  39                 continue
  40             height = min(orig_height, height or orig_height)
  41             formats.append({
  42                 'url': video_url,
  43                 'format_id': format_id,
  44                 'width': height * aspect_ratio if aspect_ratio else None,
  45                 'height': height,
  46                 'quality': quality(format_id),
  47             })
  48
  49         return {
  50             'id': video_id,
  51             'webpage_url': f'https://redgifs.com/watch/{video_id}',
  52             'extractor_key': RedGifsIE.ie_key(),
  53             'extractor': 'RedGifs',
  54             'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
  55             'timestamp': int_or_none(gif_data.get('createDate')),
  56             'uploader': gif_data.get('userName'),
  57             'duration': int_or_none(gif_data.get('duration')),
  58             'view_count': int_or_none(gif_data.get('views')),
  59             'like_count': int_or_none(gif_data.get('likes')),
  60             'categories': gif_data.get('tags') or [],
  61             'tags': gif_data.get('tags'),
  62             'age_limit': 18,
  63             'formats': formats,
  64         }
  65
  66     def _fetch_oauth_token(self, video_id):
  67         # https://github.com/Redgifs/api/wiki/Temporary-tokens
  68         auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
  69                                    video_id, note='Fetching temporary token')
  70         if not auth.get('token'):
  71             raise ExtractorError('Unable to get temporary token')
  72         self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
  73
  74     def _call_api(self, ep, video_id, **kwargs):
  75         for first_attempt in True, False:
  76             if 'authorization' not in self._API_HEADERS:
  77                 self._fetch_oauth_token(video_id)
  78             try:
  79                 headers = dict(self._API_HEADERS)
  80                 headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
  81                 data = self._download_json(
  82                     f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, **kwargs)
  83                 break
  84             except ExtractorError as e:
  85                 if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401:
  86                     del self._API_HEADERS['authorization']  # refresh the token
  87                     continue
  88                 raise
  89
  90         if 'error' in data:
  91             raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
  92         return data
  93
  94     def _fetch_page(self, ep, video_id, query, page):
  95         query['page'] = page + 1
  96         data = self._call_api(
  97             ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
  98
  99         for entry in data['gifs']:
 100             yield self._parse_gif_data(entry)
 101
 102     def _prepare_api_query(self, query, fields):
 103         api_query = [
 104             (field_name, query.get(field_name, (default,))[0])
 105             for field_name, default in fields.items()]
 106
 107         return {key: val for key, val in api_query if val is not None}
 108
 109     def _paged_entries(self, ep, item_id, query, fields):
 110         page = int_or_none(query.get('page', (None,))[0])
 111         page_fetcher = functools.partial(
 112             self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
 113         return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
 114
 115
 116 class RedGifsIE(RedGifsBaseInfoExtractor):
 117     _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
 118     _TESTS = [{
 119         'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
 120         'info_dict': {
 121             'id': 'squeakyhelplesswisent',
 122             'ext': 'mp4',
 123             'title': 'Hotwife Legs Thick',
 124             'timestamp': 1636287915,
 125             'upload_date': '20211107',
 126             'uploader': 'ignored52',
 127             'duration': 16,
 128             'view_count': int,
 129             'like_count': int,
 130             'categories': list,
 131             'age_limit': 18,
 132             'tags': list,
 133         },
 134     }, {
 135         'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
 136         'info_dict': {
 137             'id': 'squeakyhelplesswisent',
 138             'ext': 'mp4',
 139             'title': 'Hotwife Legs Thick',
 140             'timestamp': 1636287915,
 141             'upload_date': '20211107',
 142             'uploader': 'ignored52',
 143             'duration': 16,
 144             'view_count': int,
 145             'like_count': int,
 146             'categories': list,
 147             'age_limit': 18,
 148             'tags': list,
 149         },
 150     }]
 151
 152     def _real_extract(self, url):
 153         video_id = self._match_id(url).lower()
 154         video_info = self._call_api(
 155             f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
 156         return self._parse_gif_data(video_info['gif'])
 157
 158
 159 class RedGifsSearchIE(RedGifsBaseInfoExtractor):
 160     IE_DESC = 'Redgifs search'
 161     _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
 162     _PAGE_SIZE = 80
 163     _TESTS = [
 164         {
 165             'url': 'https://www.redgifs.com/browse?tags=Lesbian',
 166             'info_dict': {
 167                 'id': 'tags=Lesbian',
 168                 'title': 'Lesbian',
 169                 'description': 'RedGifs search for Lesbian, ordered by trending',
 170             },
 171             'playlist_mincount': 100,
 172         },
 173         {
 174             'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
 175             'info_dict': {
 176                 'id': 'type=g&order=latest&tags=Lesbian',
 177                 'title': 'Lesbian',
 178                 'description': 'RedGifs search for Lesbian, ordered by latest',
 179             },
 180             'playlist_mincount': 100,
 181         },
 182         {
 183             'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
 184             'info_dict': {
 185                 'id': 'type=g&order=latest&tags=Lesbian&page=2',
 186                 'title': 'Lesbian',
 187                 'description': 'RedGifs search for Lesbian, ordered by latest',
 188             },
 189             'playlist_count': 80,
 190         },
 191     ]
 192
 193     def _real_extract(self, url):
 194         query_str = self._match_valid_url(url).group('query')
 195         query = urllib.parse.parse_qs(query_str)
 196         if not query.get('tags'):
 197             raise ExtractorError('Invalid query tags', expected=True)
 198
 199         tags = query.get('tags')[0]
 200         order = query.get('order', ('trending',))[0]
 201
 202         query['search_text'] = [tags]
 203         entries = self._paged_entries('gifs/search', query_str, query, {
 204             'search_text': None,
 205             'order': 'trending',
 206             'type': None,
 207         })
 208
 209         return self.playlist_result(
 210             entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
 211
 212
 213 class RedGifsUserIE(RedGifsBaseInfoExtractor):
 214     IE_DESC = 'Redgifs user'
 215     _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
 216     _PAGE_SIZE = 80
 217     _TESTS = [
 218         {
 219             'url': 'https://www.redgifs.com/users/lamsinka89',
 220             'info_dict': {
 221                 'id': 'lamsinka89',
 222                 'title': 'lamsinka89',
 223                 'description': 'RedGifs user lamsinka89, ordered by recent',
 224             },
 225             'playlist_mincount': 391,
 226         },
 227         {
 228             'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
 229             'info_dict': {
 230                 'id': 'lamsinka89?page=3',
 231                 'title': 'lamsinka89',
 232                 'description': 'RedGifs user lamsinka89, ordered by recent',
 233             },
 234             'playlist_count': 80,
 235         },
 236         {
 237             'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
 238             'info_dict': {
 239                 'id': 'lamsinka89?order=best&type=g',
 240                 'title': 'lamsinka89',
 241                 'description': 'RedGifs user lamsinka89, ordered by best',
 242             },
 243             'playlist_mincount': 391,
 244         },
 245         {
 246             'url': 'https://www.redgifs.com/users/ignored52',
 247             'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382',
 248             'info_dict': {
 249                 'id': 'ignored52',
 250                 'title': 'ignored52',
 251                 'description': 'RedGifs user ignored52, ordered by recent',
 252             },
 253             'playlist_mincount': 121,
 254         },
 255     ]
 256
 257     def _real_extract(self, url):
 258         username, query_str = self._match_valid_url(url).group('username', 'query')
 259         playlist_id = f'{username}?{query_str}' if query_str else username
 260
 261         query = urllib.parse.parse_qs(query_str)
 262         order = query.get('order', ('recent',))[0]
 263
 264         entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
 265             'order': 'recent',
 266             'type': None,
 267         })
 268
 269         return self.playlist_result(
 270             entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')