3 from .common
import InfoExtractor
12 class ArcPublishingIE(InfoExtractor
):
13 _UUID_REGEX
= r
'[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12}'
14 _VALID_URL
= r
'arcpublishing:(?P<org>[a-z]+):(?P<id>%s)' % _UUID_REGEX
16 # https://www.adn.com/politics/2020/11/02/video-senate-candidates-campaign-in-anchorage-on-eve-of-election-day/
17 'url': 'arcpublishing:adn:8c99cb6e-b29c-4bc9-9173-7bf9979225ab',
18 'only_matching': True,
20 # https://www.bostonglobe.com/video/2020/12/30/metro/footage-released-showing-officer-talking-about-striking-protesters-with-car/
21 'url': 'arcpublishing:bostonglobe:232b7ae6-7d73-432d-bc0a-85dbf0119ab1',
22 'only_matching': True,
24 # https://www.actionnewsjax.com/video/live-stream/
25 'url': 'arcpublishing:cmg:cfb1cf1b-3ab5-4d1b-86c5-a5515d311f2a',
26 'only_matching': True,
28 # https://elcomercio.pe/videos/deportes/deporte-total-futbol-peruano-seleccion-peruana-la-valorizacion-de-los-peruanos-en-el-exterior-tras-un-2020-atipico-nnav-vr-video-noticia/
29 'url': 'arcpublishing:elcomercio:27a7e1f8-2ec7-4177-874f-a4feed2885b3',
30 'only_matching': True,
32 # https://www.clickondetroit.com/video/community/2020/05/15/events-surrounding-woodward-dream-cruise-being-canceled/
33 'url': 'arcpublishing:gmg:c8793fb2-8d44-4242-881e-2db31da2d9fe',
34 'only_matching': True,
36 # https://www.wabi.tv/video/2020/12/30/trenton-company-making-equipment-pfizer-covid-vaccine/
37 'url': 'arcpublishing:gray:0b0ba30e-032a-4598-8810-901d70e6033e',
38 'only_matching': True,
40 # https://www.lateja.cr/el-mundo/video-china-aprueba-con-condiciones-su-primera/dfcbfa57-527f-45ff-a69b-35fe71054143/video/
41 'url': 'arcpublishing:gruponacion:dfcbfa57-527f-45ff-a69b-35fe71054143',
42 'only_matching': True,
44 # https://www.fifthdomain.com/video/2018/03/09/is-america-vulnerable-to-a-cyber-attack/
45 'url': 'arcpublishing:mco:aa0ca6fe-1127-46d4-b32c-be0d6fdb8055',
46 'only_matching': True,
48 # https://www.vl.no/kultur/2020/12/09/en-melding-fra-en-lytter-endret-julelista-til-lewi-bergrud/
49 'url': 'arcpublishing:mentormedier:47a12084-650b-4011-bfd0-3699b6947b2d',
50 'only_matching': True,
52 # https://www.14news.com/2020/12/30/whiskey-theft-caught-camera-henderson-liquor-store/
53 'url': 'arcpublishing:raycom:b89f61f8-79fa-4c09-8255-e64237119bf7',
54 'only_matching': True,
56 # https://www.theglobeandmail.com/world/video-ethiopian-woman-who-became-symbol-of-integration-in-italy-killed-on/
57 'url': 'arcpublishing:tgam:411b34c1-8701-4036-9831-26964711664b',
58 'only_matching': True,
60 # https://www.pilotonline.com/460f2931-8130-4719-8ea1-ffcb2d7cb685-132.html
61 'url': 'arcpublishing:tronc:460f2931-8130-4719-8ea1-ffcb2d7cb685',
62 'only_matching': True,
65 (['cmg', 'prisa'], '%s-config-prod.api.cdn.arcpublishing.com/video'),
67 'adn', 'advancelocal', 'answers', 'bonnier', 'bostonglobe', 'demo',
68 'gmg', 'gruponacion', 'infobae', 'mco', 'nzme', 'pmn', 'raycom',
69 'spectator', 'tbt', 'tgam', 'tronc', 'wapo', 'wweek',
70 ], 'video-api-cdn.%s.arcpublishing.com/api'),
74 def _extract_embed_urls(cls
, url
, webpage
):
76 # https://arcpublishing.atlassian.net/wiki/spaces/POWA/overview
77 for powa_el
in re
.findall(r
'(<div[^>]+class="[^"]*\bpowa\b[^"]*"[^>]+data-uuid="%s"[^>]*>)' % ArcPublishingIE
._UUID
_REGEX
, webpage
):
78 powa
= extract_attributes(powa_el
) or {}
79 org
= powa
.get('data-org')
80 uuid
= powa
.get('data-uuid')
82 entries
.append('arcpublishing:%s:%s' % (org
, uuid
))
85 def _real_extract(self
, url
):
86 org
, uuid
= self
._match
_valid
_url
(url
).groups()
87 for orgs
, tmpl
in self
._POWA
_DEFAULTS
:
92 base_api_tmpl
= '%s-prod-cdn.video-api.arcpublishing.com/api'
95 video
= self
._download
_json
(
96 'https://%s/v1/ansvideos/findByUuid' % (base_api_tmpl
% org
),
97 uuid
, query
={'uuid': uuid
})[0]
98 title
= video
['headlines']['basic']
99 is_live
= video
.get('status') == 'live'
103 for s
in video
.get('streams', []):
105 if not s_url
or s_url
in urls
:
108 stream_type
= s
.get('stream_type')
109 if stream_type
== 'smil':
110 smil_formats
= self
._extract
_smil
_formats
(
111 s_url
, uuid
, fatal
=False)
112 for f
in smil_formats
:
113 if f
['url'].endswith('/cfx/st'):
115 if not f
['play_path'].startswith('mp4:'):
116 f
['play_path'] = 'mp4:' + f
['play_path']
117 if isinstance(f
['tbr'], float):
118 f
['vbr'] = f
['tbr'] * 1000
120 f
['format_id'] = 'rtmp-%d' % f
['vbr']
121 formats
.extend(smil_formats
)
122 elif stream_type
in ('ts', 'hls'):
123 m3u8_formats
= self
._extract
_m
3u8_formats
(
124 s_url
, uuid
, 'mp4', live
=is_live
, m3u8_id
='hls', fatal
=False)
125 if all([f
.get('acodec') == 'none' for f
in m3u8_formats
]):
127 for f
in m3u8_formats
:
128 height
= f
.get('height')
131 vbr
= self
._search
_regex
(
132 r
'[_x]%d[_-](\d+)' % height
, f
['url'], 'vbr', default
=None)
135 formats
.extend(m3u8_formats
)
137 vbr
= int_or_none(s
.get('bitrate'))
139 'format_id': '%s-%d' % (stream_type
, vbr
) if vbr
else stream_type
,
141 'width': int_or_none(s
.get('width')),
142 'height': int_or_none(s
.get('height')),
143 'filesize': int_or_none(s
.get('filesize')),
149 for subtitle
in (try_get(video
, lambda x
: x
['subtitles']['urls'], list) or []):
150 subtitle_url
= subtitle
.get('url')
152 subtitles
.setdefault('en', []).append({'url': subtitle_url
})
157 'thumbnail': try_get(video
, lambda x
: x
['promo_image']['url']),
158 'description': try_get(video
, lambda x
: x
['subheadlines']['basic']),
160 'duration': int_or_none(video
.get('duration'), 100),
161 'timestamp': parse_iso8601(video
.get('created_date')),
162 'subtitles': subtitles
,