[ie/facebook] Support more groups URLs (#11576)
[yt-dlp3.git] / yt_dlp / extractor / ninegag.py
blob2979f3a50e79daf568340a57436f4a07002b4d90
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 determine_ext,
5 int_or_none,
6 traverse_obj,
7 unescapeHTML,
8 url_or_none,
12 class NineGagIE(InfoExtractor):
13 IE_NAME = '9gag'
14 IE_DESC = '9GAG'
15 _VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
17 _TESTS = [{
18 'url': 'https://9gag.com/gag/ae5Ag7B',
19 'info_dict': {
20 'id': 'ae5Ag7B',
21 'ext': 'webm',
22 'title': 'Capybara Agility Training',
23 'upload_date': '20191108',
24 'timestamp': 1573237208,
25 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ae5Ag7B_460s.jpg',
26 'categories': ['Awesome'],
27 'tags': ['Awesome'],
28 'duration': 44,
29 'like_count': int,
30 'dislike_count': int,
31 'comment_count': int,
33 }, {
34 # HTML escaped title
35 'url': 'https://9gag.com/gag/av5nvyb',
36 'only_matching': True,
37 }, {
38 # Non Anonymous Uploader
39 'url': 'https://9gag.com/gag/ajgp66G',
40 'info_dict': {
41 'id': 'ajgp66G',
42 'ext': 'webm',
43 'title': 'Master Shifu! Or Splinter! You decide:',
44 'upload_date': '20220806',
45 'timestamp': 1659803411,
46 'thumbnail': 'https://img-9gag-fun.9cache.com/photo/ajgp66G_460s.jpg',
47 'categories': ['Funny'],
48 'tags': ['Funny'],
49 'duration': 26,
50 'like_count': int,
51 'dislike_count': int,
52 'comment_count': int,
53 'uploader': 'Peter Klaus',
54 'uploader_id': 'peterklaus12',
55 'uploader_url': 'https://9gag.com/u/peterklaus12',
59 def _real_extract(self, url):
60 post_id = self._match_id(url)
61 post = self._download_json(
62 'https://9gag.com/v1/post', post_id, query={
63 'id': post_id,
64 })['data']['post']
66 if post.get('type') != 'Animated':
67 raise ExtractorError(
68 'The given url does not contain a video',
69 expected=True)
71 duration = None
72 formats = []
73 thumbnails = []
74 for key, image in (post.get('images') or {}).items():
75 image_url = url_or_none(image.get('url'))
76 if not image_url:
77 continue
78 ext = determine_ext(image_url)
79 image_id = key.strip('image')
80 common = {
81 'url': image_url,
82 'width': int_or_none(image.get('width')),
83 'height': int_or_none(image.get('height')),
85 if ext in ('jpg', 'png'):
86 webp_url = image.get('webpUrl')
87 if webp_url:
88 t = common.copy()
89 t.update({
90 'id': image_id + '-webp',
91 'url': webp_url,
93 thumbnails.append(t)
94 common.update({
95 'id': image_id,
96 'ext': ext,
98 thumbnails.append(common)
99 elif ext in ('webm', 'mp4'):
100 if not duration:
101 duration = int_or_none(image.get('duration'))
102 common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
103 for vcodec in ('vp8', 'vp9', 'h265'):
104 c_url = image.get(vcodec + 'Url')
105 if not c_url:
106 continue
107 c_f = common.copy()
108 c_f.update({
109 'format_id': image_id + '-' + vcodec,
110 'url': c_url,
111 'vcodec': vcodec,
113 formats.append(c_f)
114 common.update({
115 'ext': ext,
116 'format_id': image_id,
118 formats.append(common)
120 section = traverse_obj(post, ('postSection', 'name'))
122 tags = None
123 post_tags = post.get('tags')
124 if post_tags:
125 tags = []
126 for tag in post_tags:
127 tag_key = tag.get('key')
128 if not tag_key:
129 continue
130 tags.append(tag_key)
132 return {
133 'id': post_id,
134 'title': unescapeHTML(post.get('title')),
135 'timestamp': int_or_none(post.get('creationTs')),
136 'duration': duration,
137 'uploader': traverse_obj(post, ('creator', 'fullName')),
138 'uploader_id': traverse_obj(post, ('creator', 'username')),
139 'uploader_url': url_or_none(traverse_obj(post, ('creator', 'profileUrl'))),
140 'formats': formats,
141 'thumbnails': thumbnails,
142 'like_count': int_or_none(post.get('upVoteCount')),
143 'dislike_count': int_or_none(post.get('downVoteCount')),
144 'comment_count': int_or_none(post.get('commentsCount')),
145 'age_limit': 18 if post.get('nsfw') == 1 else None,
146 'categories': [section] if section else None,
147 'tags': tags,