[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / redgifs.py
blobb11ea273dea343e1ffdee5c6e2a9ebc94b6c3237
1 import functools
2 import urllib.parse
4 from .common import InfoExtractor
5 from ..networking.exceptions import HTTPError
6 from ..utils import (
7 ExtractorError,
8 OnDemandPagedList,
9 int_or_none,
10 qualities,
11 try_get,
15 class RedGifsBaseInfoExtractor(InfoExtractor):
16 _FORMATS = {
17 'gif': 250,
18 'sd': 480,
19 'hd': None,
22 _API_HEADERS = {
23 'referer': 'https://www.redgifs.com/',
24 'origin': 'https://www.redgifs.com',
25 'content-type': 'application/json',
28 def _parse_gif_data(self, gif_data):
29 video_id = gif_data.get('id')
30 quality = qualities(tuple(self._FORMATS.keys()))
32 orig_height = int_or_none(gif_data.get('height'))
33 aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
35 formats = []
36 for format_id, height in self._FORMATS.items():
37 video_url = gif_data['urls'].get(format_id)
38 if not video_url:
39 continue
40 height = min(orig_height, height or orig_height)
41 formats.append({
42 'url': video_url,
43 'format_id': format_id,
44 'width': height * aspect_ratio if aspect_ratio else None,
45 'height': height,
46 'quality': quality(format_id),
49 return {
50 'id': video_id,
51 'webpage_url': f'https://redgifs.com/watch/{video_id}',
52 'extractor_key': RedGifsIE.ie_key(),
53 'extractor': 'RedGifs',
54 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
55 'timestamp': int_or_none(gif_data.get('createDate')),
56 'uploader': gif_data.get('userName'),
57 'duration': int_or_none(gif_data.get('duration')),
58 'view_count': int_or_none(gif_data.get('views')),
59 'like_count': int_or_none(gif_data.get('likes')),
60 'categories': gif_data.get('tags') or [],
61 'tags': gif_data.get('tags'),
62 'age_limit': 18,
63 'formats': formats,
66 def _fetch_oauth_token(self, video_id):
67 # https://github.com/Redgifs/api/wiki/Temporary-tokens
68 auth = self._download_json('https://api.redgifs.com/v2/auth/temporary',
69 video_id, note='Fetching temporary token')
70 if not auth.get('token'):
71 raise ExtractorError('Unable to get temporary token')
72 self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}'
74 def _call_api(self, ep, video_id, **kwargs):
75 for first_attempt in True, False:
76 if 'authorization' not in self._API_HEADERS:
77 self._fetch_oauth_token(video_id)
78 try:
79 headers = dict(self._API_HEADERS)
80 headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}'
81 data = self._download_json(
82 f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, **kwargs)
83 break
84 except ExtractorError as e:
85 if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401:
86 del self._API_HEADERS['authorization'] # refresh the token
87 continue
88 raise
90 if 'error' in data:
91 raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
92 return data
94 def _fetch_page(self, ep, video_id, query, page):
95 query['page'] = page + 1
96 data = self._call_api(
97 ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
99 for entry in data['gifs']:
100 yield self._parse_gif_data(entry)
102 def _prepare_api_query(self, query, fields):
103 api_query = [
104 (field_name, query.get(field_name, (default,))[0])
105 for field_name, default in fields.items()]
107 return {key: val for key, val in api_query if val is not None}
109 def _paged_entries(self, ep, item_id, query, fields):
110 page = int_or_none(query.get('page', (None,))[0])
111 page_fetcher = functools.partial(
112 self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
113 return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
116 class RedGifsIE(RedGifsBaseInfoExtractor):
117 _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
118 _TESTS = [{
119 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
120 'info_dict': {
121 'id': 'squeakyhelplesswisent',
122 'ext': 'mp4',
123 'title': 'Hotwife Legs Thick',
124 'timestamp': 1636287915,
125 'upload_date': '20211107',
126 'uploader': 'ignored52',
127 'duration': 16,
128 'view_count': int,
129 'like_count': int,
130 'categories': list,
131 'age_limit': 18,
132 'tags': list,
134 }, {
135 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0',
136 'info_dict': {
137 'id': 'squeakyhelplesswisent',
138 'ext': 'mp4',
139 'title': 'Hotwife Legs Thick',
140 'timestamp': 1636287915,
141 'upload_date': '20211107',
142 'uploader': 'ignored52',
143 'duration': 16,
144 'view_count': int,
145 'like_count': int,
146 'categories': list,
147 'age_limit': 18,
148 'tags': list,
152 def _real_extract(self, url):
153 video_id = self._match_id(url).lower()
154 video_info = self._call_api(
155 f'gifs/{video_id}?views=yes', video_id, note='Downloading video info')
156 return self._parse_gif_data(video_info['gif'])
159 class RedGifsSearchIE(RedGifsBaseInfoExtractor):
160 IE_DESC = 'Redgifs search'
161 _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
162 _PAGE_SIZE = 80
163 _TESTS = [
165 'url': 'https://www.redgifs.com/browse?tags=Lesbian',
166 'info_dict': {
167 'id': 'tags=Lesbian',
168 'title': 'Lesbian',
169 'description': 'RedGifs search for Lesbian, ordered by trending',
171 'playlist_mincount': 100,
174 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
175 'info_dict': {
176 'id': 'type=g&order=latest&tags=Lesbian',
177 'title': 'Lesbian',
178 'description': 'RedGifs search for Lesbian, ordered by latest',
180 'playlist_mincount': 100,
183 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
184 'info_dict': {
185 'id': 'type=g&order=latest&tags=Lesbian&page=2',
186 'title': 'Lesbian',
187 'description': 'RedGifs search for Lesbian, ordered by latest',
189 'playlist_count': 80,
193 def _real_extract(self, url):
194 query_str = self._match_valid_url(url).group('query')
195 query = urllib.parse.parse_qs(query_str)
196 if not query.get('tags'):
197 raise ExtractorError('Invalid query tags', expected=True)
199 tags = query.get('tags')[0]
200 order = query.get('order', ('trending',))[0]
202 query['search_text'] = [tags]
203 entries = self._paged_entries('gifs/search', query_str, query, {
204 'search_text': None,
205 'order': 'trending',
206 'type': None,
209 return self.playlist_result(
210 entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
213 class RedGifsUserIE(RedGifsBaseInfoExtractor):
214 IE_DESC = 'Redgifs user'
215 _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
216 _PAGE_SIZE = 80
217 _TESTS = [
219 'url': 'https://www.redgifs.com/users/lamsinka89',
220 'info_dict': {
221 'id': 'lamsinka89',
222 'title': 'lamsinka89',
223 'description': 'RedGifs user lamsinka89, ordered by recent',
225 'playlist_mincount': 391,
228 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
229 'info_dict': {
230 'id': 'lamsinka89?page=3',
231 'title': 'lamsinka89',
232 'description': 'RedGifs user lamsinka89, ordered by recent',
234 'playlist_count': 80,
237 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
238 'info_dict': {
239 'id': 'lamsinka89?order=best&type=g',
240 'title': 'lamsinka89',
241 'description': 'RedGifs user lamsinka89, ordered by best',
243 'playlist_mincount': 391,
246 'url': 'https://www.redgifs.com/users/ignored52',
247 'note': 'https://github.com/yt-dlp/yt-dlp/issues/7382',
248 'info_dict': {
249 'id': 'ignored52',
250 'title': 'ignored52',
251 'description': 'RedGifs user ignored52, ordered by recent',
253 'playlist_mincount': 121,
257 def _real_extract(self, url):
258 username, query_str = self._match_valid_url(url).group('username', 'query')
259 playlist_id = f'{username}?{query_str}' if query_str else username
261 query = urllib.parse.parse_qs(query_str)
262 order = query.get('order', ('recent',))[0]
264 entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
265 'order': 'recent',
266 'type': None,
269 return self.playlist_result(
270 entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')