[ie/box] Fix formats extraction (#8649)
[yt-dlp3.git] / yt_dlp / extractor / gfycat.py
blobedc2e56e447328831936c568ff1e8e2d7a580e35
1 from .common import InfoExtractor
2 from ..utils import (
3 int_or_none,
4 float_or_none,
5 qualities,
6 ExtractorError,
10 class GfycatIE(InfoExtractor):
11 _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)'
12 _EMBED_REGEX = [rf'<(?:iframe|source)[^>]+\bsrc=["\'](?P<url>{_VALID_URL})']
13 _TESTS = [{
14 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher',
15 'info_dict': {
16 'id': 'DeadlyDecisiveGermanpinscher',
17 'ext': 'mp4',
18 'title': 'Ghost in the Shell',
19 'timestamp': 1410656006,
20 'upload_date': '20140914',
21 'uploader': 'anonymous',
22 'duration': 10.4,
23 'view_count': int,
24 'like_count': int,
25 'categories': list,
26 'age_limit': 0,
27 'uploader_id': 'anonymous',
28 'description': '',
30 }, {
31 'url': 'http://gfycat.com/ifr/JauntyTimelyAmazontreeboa',
32 'info_dict': {
33 'id': 'JauntyTimelyAmazontreeboa',
34 'ext': 'mp4',
35 'title': 'JauntyTimelyAmazontreeboa',
36 'timestamp': 1411720126,
37 'upload_date': '20140926',
38 'uploader': 'anonymous',
39 'duration': 3.52,
40 'view_count': int,
41 'like_count': int,
42 'categories': list,
43 'age_limit': 0,
44 'uploader_id': 'anonymous',
45 'description': '',
47 }, {
48 'url': 'https://gfycat.com/alienatedsolidgreathornedowl',
49 'info_dict': {
50 'id': 'alienatedsolidgreathornedowl',
51 'ext': 'mp4',
52 'upload_date': '20211226',
53 'uploader_id': 'reactions',
54 'timestamp': 1640536930,
55 'like_count': int,
56 'description': '',
57 'title': 'Ingrid Michaelson, Zooey Deschanel - Merry Christmas Happy New Year',
58 'categories': list,
59 'age_limit': 0,
60 'duration': 2.9583333333333335,
61 'uploader': 'Reaction GIFs',
62 'view_count': int,
64 }, {
65 'url': 'https://gfycat.com/ru/RemarkableDrearyAmurstarfish',
66 'only_matching': True
67 }, {
68 'url': 'https://gfycat.com/gifs/detail/UnconsciousLankyIvorygull',
69 'only_matching': True
70 }, {
71 'url': 'https://gfycat.com/acceptablehappygoluckyharborporpoise-baseball',
72 'only_matching': True
73 }, {
74 'url': 'https://thumbs.gfycat.com/acceptablehappygoluckyharborporpoise-size_restricted.gif',
75 'only_matching': True
76 }, {
77 'url': 'https://giant.gfycat.com/acceptablehappygoluckyharborporpoise.mp4',
78 'only_matching': True
79 }, {
80 'url': 'http://gfycat.com/IFR/JauntyTimelyAmazontreeboa',
81 'only_matching': True
84 def _real_extract(self, url):
85 video_id = self._match_id(url)
87 gfy = self._download_json(
88 'https://api.gfycat.com/v1/gfycats/%s' % video_id,
89 video_id, 'Downloading video info')
90 if 'error' in gfy:
91 raise ExtractorError('Gfycat said: ' + gfy['error'], expected=True)
92 gfy = gfy['gfyItem']
94 title = gfy.get('title') or gfy['gfyName']
95 description = gfy.get('description')
96 timestamp = int_or_none(gfy.get('createDate'))
97 uploader = gfy.get('userName') or gfy.get('username')
98 view_count = int_or_none(gfy.get('views'))
99 like_count = int_or_none(gfy.get('likes'))
100 dislike_count = int_or_none(gfy.get('dislikes'))
101 age_limit = 18 if gfy.get('nsfw') == '1' else 0
103 width = int_or_none(gfy.get('width'))
104 height = int_or_none(gfy.get('height'))
105 fps = int_or_none(gfy.get('frameRate'))
106 num_frames = int_or_none(gfy.get('numFrames'))
108 duration = float_or_none(num_frames, fps) if num_frames and fps else None
110 categories = gfy.get('tags') or gfy.get('extraLemmas') or []
112 FORMATS = ('gif', 'webm', 'mp4')
113 quality = qualities(FORMATS)
115 formats = []
116 for format_id in FORMATS:
117 video_url = gfy.get('%sUrl' % format_id)
118 if not video_url:
119 continue
120 filesize = int_or_none(gfy.get('%sSize' % format_id))
121 formats.append({
122 'url': video_url,
123 'format_id': format_id,
124 'width': width,
125 'height': height,
126 'fps': fps,
127 'filesize': filesize,
128 'quality': quality(format_id),
131 return {
132 'id': video_id,
133 'title': title,
134 'description': description,
135 'timestamp': timestamp,
136 'uploader': gfy.get('userDisplayName') or uploader,
137 'uploader_id': uploader,
138 'duration': duration,
139 'view_count': view_count,
140 'like_count': like_count,
141 'dislike_count': dislike_count,
142 'categories': categories,
143 'age_limit': age_limit,
144 'formats': formats,