7 from .common
import InfoExtractor
11 from ..networking
import HEADRequest
21 class GloboIE(InfoExtractor
):
22 _VALID_URL
= r
'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
23 _NETRC_MACHINE
= 'globo'
25 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
29 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
32 'uploader_id': '2015',
35 'skip_download': True,
38 'url': 'http://globoplay.globo.com/v/4581987/',
42 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
44 'uploader': 'Rede Globo',
48 'skip_download': True,
51 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
52 'only_matching': True,
54 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
55 'only_matching': True,
57 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
58 'only_matching': True,
60 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
61 'only_matching': True,
63 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
64 'only_matching': True,
66 'url': 'globo:3607726',
67 'only_matching': True,
69 'url': 'https://globoplay.globo.com/v/10248083/',
73 'title': 'Melhores momentos: Equador 1 x 1 Brasil pelas Eliminatórias da Copa do Mundo 2022',
79 'skip_download': True,
83 def _real_extract(self
, url
):
84 video_id
= self
._match
_id
(url
)
86 self
._request
_webpage
(
87 HEADRequest('https://globo-ab.globo.com/v2/selected-alternatives?experiments=player-isolated-experiment-02&skipImpressions=true'),
88 video_id
, 'Getting cookies')
90 video
= self
._download
_json
(
91 'http://api.globovideos.com/videos/%s/playlist' % video_id
,
92 video_id
)['videos'][0]
93 if not self
.get_param('allow_unplayable_formats') and video
.get('encrypted') is True:
94 self
.report_drm(video_id
)
96 title
= video
['title']
99 security
= self
._download
_json
(
100 'https://playback.video.globo.com/v2/video-session', video_id
, 'Downloading security hash for %s' % video_id
,
101 headers
={'content-type': 'application/json'}, data
=json
.dumps({
102 "player_type": "desktop",
103 "video_id": video_id
,
105 "content_protection": "widevine",
106 "vsid": "581b986b-4c40-71f0-5a58-803e579d5fa2",
110 self
._request
_webpage
(HEADRequest(security
['sources'][0]['url_template']), video_id
, 'Getting locksession cookie')
112 security_hash
= security
['sources'][0]['token']
113 if not security_hash
:
114 message
= security
.get('message')
116 raise ExtractorError(
117 '%s returned error: %s' % (self
.IE_NAME
, message
), expected
=True)
119 hash_code
= security_hash
[:2]
120 padding
= '%010d' % random
.randint(1, 10000000000)
121 if hash_code
in ('04', '14'):
122 received_time
= security_hash
[3:13]
123 received_md5
= security_hash
[24:]
124 hash_prefix
= security_hash
[:23]
125 elif hash_code
in ('02', '12', '03', '13'):
126 received_time
= security_hash
[2:12]
127 received_md5
= security_hash
[22:]
129 hash_prefix
= '05' + security_hash
[:22]
131 padded_sign_time
= compat_str(int(received_time
) + 86400) + padding
132 md5_data
= (received_md5
+ padded_sign_time
+ '0xAC10FD').encode()
133 signed_md5
= base64
.urlsafe_b64encode(hashlib
.md5(md5_data
).digest()).decode().strip('=')
134 signed_hash
= hash_prefix
+ padded_sign_time
+ signed_md5
135 source
= security
['sources'][0]['url_parts']
136 resource_url
= source
['scheme'] + '://' + source
['domain'] + source
['path']
137 signed_url
= '%s?h=%s&k=html5&a=%s' % (resource_url
, signed_hash
, 'F' if video
.get('subscriber_only') else 'A')
139 fmts
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(
140 signed_url
, video_id
, 'mp4', entry_protocol
='m3u8_native', m3u8_id
='hls', fatal
=False)
143 for resource
in video
['resources']:
144 if resource
.get('type') == 'subtitle':
145 subtitles
.setdefault(resource
.get('language') or 'por', []).append({
146 'url': resource
.get('url'),
148 subs
= try_get(security
, lambda x
: x
['source']['subtitles'], expected_type
=dict) or {}
149 for sub_lang
, sub_url
in subs
.items():
151 subtitles
.setdefault(sub_lang
or 'por', []).append({
154 subs
= try_get(security
, lambda x
: x
['source']['subtitles_webvtt'], expected_type
=dict) or {}
155 for sub_lang
, sub_url
in subs
.items():
157 subtitles
.setdefault(sub_lang
or 'por', []).append({
161 duration
= float_or_none(video
.get('duration'), 1000)
162 uploader
= video
.get('channel')
163 uploader_id
= str_or_none(video
.get('channel_id'))
168 'duration': duration
,
169 'uploader': uploader
,
170 'uploader_id': uploader_id
,
172 'subtitles': subtitles
,
176 class GloboArticleIE(InfoExtractor
):
177 _VALID_URL
= r
'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
180 r
'\bdata-video-id=["\'](\d
{7,})["\']',
181 r'\bdata-player-videosids=["\'](\d
{7,})["\']',
182 r'\bvideosIDs\s*:\s*["\']?
(\d
{7,})',
183 r'\bdata
-id=["\'](\d{7,})["\']',
184 r'<div
[^
>]+\bid
=["\'](\d{7,})["\']',
185 r'<bs
-player
[^
>]+\bvideoid
=["\'](\d{8,})["\']',
189 'url
': 'http
://g1
.globo
.com
/jornal
-nacional
/noticia
/2014/09/novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
.html
',
191 'id': 'novidade
-na
-fiscalizacao
-de
-bagagem
-pela
-receita
-provoca
-discussoes
',
192 'title
': 'Novidade na fiscalização de bagagem pela Receita provoca discussões
',
193 'description
': 'md5
:c3c4b4d4c30c32fce460040b1ac46b12
',
197 'url
': 'http
://g1
.globo
.com
/pr
/parana
/noticia
/2016/09/mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
.html
',
199 'id': 'mpf
-denuncia
-lula
-marisa
-e
-mais
-seis
-na
-operacao
-lava
-jato
',
200 'title
': "Lula era o 'comandante máximo
' do esquema da Lava Jato, diz MPF",
201 'description
': 'md5
:8aa7cc8beda4dc71cc8553e00b77c54c
',
205 'url
': 'http
://gq
.globo
.com
/Prazeres
/Poder
/noticia
/2015/10/all
-o
-desafio
-assista
-ao
-segundo
-capitulo
-da
-serie
.html
',
206 'only_matching
': True,
208 'url
': 'http
://gshow
.globo
.com
/programas
/tv
-xuxa
/O
-Programa
/noticia
/2014/01/xuxa
-e
-junno
-namoram
-muuuito
-em
-luau
-de
-zeze
-di
-camargo
-e
-luciano
.html
',
209 'only_matching
': True,
211 'url
': 'http
://oglobo
.globo
.com
/rio
/a
-amizade
-entre
-um
-entregador
-de
-farmacia
-um
-piano
-19946271',
212 'only_matching
': True,
214 'url
': 'https
://ge
.globo
.com
/video
/ta
-na
-area
-como
-foi
-assistir
-ao
-jogo
-do
-palmeiras
-que
-a
-globo
-nao
-passou
-10287094.ghtml
',
216 'id': 'ta
-na
-area
-como
-foi
-assistir
-ao
-jogo
-do
-palmeiras
-que
-a
-globo
-nao
-passou
-10287094',
217 'title
': 'Tá na Área
: como foi assistir ao jogo do Palmeiras que a Globo não passou
',
218 'description
': 'md5
:2d089d036c4c9675117d3a56f8c61739
',
222 'url
': 'https
://redeglobo
.globo
.com
/rpc
/meuparana
/noticia
/a
-producao
-de
-chocolates
-no
-parana
.ghtml
',
224 'id': 'a
-producao
-de
-chocolates
-no
-parana
',
225 'title
': 'A produção de chocolates no Paraná
',
226 'description
': 'md5
:f2e3daf00ffd1dc0e9a8a6c7cfb0a89e
',
232 def suitable(cls, url):
233 return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
235 def _real_extract(self, url):
236 display_id = self._match_id(url)
237 webpage = self._download_webpage(url, display_id)
239 for video_regex in self._VIDEOID_REGEXES:
240 video_ids.extend(re.findall(video_regex, webpage))
242 self.url_result('globo
:%s' % video_id, GloboIE.ie_key())
243 for video_id in orderedSet(video_ids)]
244 title = self._og_search_title(webpage).strip()
245 description = self._html_search_meta('description
', webpage)
246 return self.playlist_result(entries, display_id, title, description)