[cleanup] Make more playlist entries lazy (#11763)
[yt-dlp.git] / yt_dlp / extractor / kinja.py
blob99c8a1224780379308e2da6cd58d17539282434a
1 import urllib.parse
3 from .common import InfoExtractor
4 from ..utils import (
5 int_or_none,
6 parse_iso8601,
7 strip_or_none,
8 try_get,
12 class KinjaEmbedIE(InfoExtractor):
13 IE_NAME = 'kinja:embed'
14 _DOMAIN_REGEX = r'''(?:[^.]+\.)?
15 (?:
16 avclub|
17 clickhole|
18 deadspin|
19 gizmodo|
20 jalopnik|
21 jezebel|
22 kinja|
23 kotaku|
24 lifehacker|
25 splinternews|
26 the(?:inventory|onion|root|takeout)
27 )\.com'''
28 _COMMON_REGEX = r'''/
29 (?:
30 ajax/inset|
31 embed/video
32 )/iframe\?.*?\bid='''
33 _VALID_URL = rf'''(?x)https?://{_DOMAIN_REGEX}{_COMMON_REGEX}
34 (?P<type>
35 fb|
36 imgur|
37 instagram|
38 jwp(?:layer)?-video|
39 kinjavideo|
40 mcp|
41 megaphone|
42 soundcloud(?:-playlist)?|
43 tumblr-post|
44 twitch-stream|
45 twitter|
46 ustream-channel|
47 vimeo|
48 vine|
49 youtube-(?:list|video)
50 )-(?P<id>[^&]+)'''
51 _EMBED_REGEX = [rf'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//{_DOMAIN_REGEX})?{_COMMON_REGEX}(?:(?!\1).)+)\1']
52 _TESTS = [{
53 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
54 'only_matching': True,
55 }, {
56 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
57 'only_matching': True,
58 }, {
59 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
60 'only_matching': True,
61 }, {
62 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
63 'only_matching': True,
64 }, {
65 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
66 'only_matching': True,
67 }, {
68 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
69 'only_matching': True,
70 }, {
71 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
72 'only_matching': True,
73 }, {
74 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
75 'only_matching': True,
76 }, {
77 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
78 'only_matching': True,
79 }, {
80 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
81 'only_matching': True,
82 }, {
83 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
84 'only_matching': True,
85 }, {
86 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
87 'only_matching': True,
88 }, {
89 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
90 'only_matching': True,
92 _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
93 _PROVIDER_MAP = {
94 'fb': ('facebook.com/video.php?v=', 'Facebook'),
95 'imgur': ('imgur.com/', 'Imgur'),
96 'instagram': ('instagram.com/p/', 'Instagram'),
97 'jwplayer-video': _JWPLATFORM_PROVIDER,
98 'jwp-video': _JWPLATFORM_PROVIDER,
99 'megaphone': ('player.megaphone.fm/', 'Generic'),
100 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
101 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
102 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
103 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
104 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
105 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
106 'vimeo': ('vimeo.com/', 'Vimeo'),
107 'vine': ('vine.co/v/', 'Vine'),
108 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
109 'youtube-video': ('youtube.com/embed/', 'Youtube'),
112 def _real_extract(self, url):
113 video_type, video_id = self._match_valid_url(url).groups()
115 provider = self._PROVIDER_MAP.get(video_type)
116 if provider:
117 video_id = urllib.parse.unquote(video_id)
118 if video_type == 'tumblr-post':
119 video_id, blog = video_id.split('-', 1)
120 result_url = provider[0] % (blog, video_id)
121 elif video_type == 'youtube-list':
122 video_id, playlist_id = video_id.split('/')
123 result_url = provider[0] % (video_id, playlist_id)
124 else:
125 result_url = provider[0] + video_id
126 return self.url_result('http://' + result_url, provider[1])
128 if video_type == 'kinjavideo':
129 data = self._download_json(
130 'https://kinja.com/api/core/video/views/videoById',
131 video_id, query={'videoId': video_id})['data']
132 title = data['title']
134 formats = []
135 for k in ('signedPlaylist', 'streaming'):
136 m3u8_url = data.get(k + 'Url')
137 if m3u8_url:
138 formats.extend(self._extract_m3u8_formats(
139 m3u8_url, video_id, 'mp4', 'm3u8_native',
140 m3u8_id='hls', fatal=False))
142 thumbnail = None
143 poster = data.get('poster') or {}
144 poster_id = poster.get('id')
145 if poster_id:
146 thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/{}.{}'.format(poster_id, poster.get('format') or 'jpg')
148 return {
149 'id': video_id,
150 'title': title,
151 'description': strip_or_none(data.get('description')),
152 'formats': formats,
153 'tags': data.get('tags'),
154 'timestamp': int_or_none(try_get(
155 data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
156 'thumbnail': thumbnail,
157 'uploader': data.get('network'),
159 else:
160 video_data = self._download_json(
161 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
162 video_id)['videoMetadata']
163 iptc = video_data['photoVideoMetadataIPTC']
164 title = iptc['title']['en']
165 fmg = video_data.get('photoVideoMetadata_fmg') or {}
166 tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
167 data = self._download_json(
168 tvss_domain + '/api/v3/video-auth/url-signature-tokens',
169 video_id, query={'mcpids': video_id})['data'][0]
170 formats = []
172 rendition_url = data.get('renditionUrl')
173 if rendition_url:
174 formats = self._extract_m3u8_formats(
175 rendition_url, video_id, 'mp4',
176 'm3u8_native', m3u8_id='hls', fatal=False)
178 fallback_rendition_url = data.get('fallbackRenditionUrl')
179 if fallback_rendition_url:
180 formats.append({
181 'format_id': 'fallback',
182 'tbr': int_or_none(self._search_regex(
183 r'_(\d+)\.mp4', fallback_rendition_url,
184 'bitrate', default=None)),
185 'url': fallback_rendition_url,
188 return {
189 'id': video_id,
190 'title': title,
191 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], str),
192 'uploader': fmg.get('network'),
193 'duration': int_or_none(iptc.get('fileDuration')),
194 'formats': formats,
195 'description': try_get(iptc, lambda x: x['description']['en'], str),
196 'timestamp': parse_iso8601(iptc.get('dateReleased')),