3 from .common
import InfoExtractor
4 from .senategov
import SenateISVPIE
5 from .ustream
import UstreamIE
6 from ..compat
import compat_HTMLParseError
12 get_element_by_attribute
,
26 class CSpanIE(InfoExtractor
):
27 _VALID_URL
= r
'https?://(?:www\.)?c-span\.org/video/\?(?P<id>[0-9a-f]+)'
30 'url': 'http://www.c-span.org/video/?313572-1/HolderonV',
31 'md5': '94b29a4f131ff03d23471dd6f60b6a1d',
34 'title': 'Attorney General Eric Holder on Voting Rights Act Decision',
36 'playlist_mincount': 2,
37 'skip': 'Regularly fails on travis, for unknown reasons',
39 'url': 'http://www.c-span.org/video/?c4486943/cspan-international-health-care-models',
44 'title': 'CSPAN - International Health Care Models',
45 'description': 'md5:7a985a2d595dba00af3d9c9f0783c967',
48 'url': 'http://www.c-span.org/video/?318608-1/gm-ignition-switch-recall',
51 'title': 'General Motors Ignition Switch Recall',
53 'playlist_mincount': 6,
55 # Video from senate.gov
56 'url': 'http://www.c-span.org/video/?104517-1/immigration-reforms-needed-protect-skilled-american-workers',
58 'id': 'judiciary031715',
60 'title': 'Immigration Reforms Needed to Protect Skilled American Workers',
63 'skip_download': True, # m3u8 downloads
66 # Ustream embedded video
67 'url': 'https://www.c-span.org/video/?114917-1/armed-services',
71 'title': 'USHR07 Armed Services Committee',
72 'description': 'hsas00-2118-20150204-1000et-07\n\n\nUSHR07 Armed Services Committee',
73 'timestamp': 1423060374,
74 'upload_date': '20150204',
75 'uploader': 'HouseCommittee',
76 'uploader_id': '12987475',
80 'url': 'https://www.c-span.org/video/?437336-1/judiciary-antitrust-competition-policy-consumer-rights',
81 'only_matching': True,
83 BRIGHTCOVE_URL_TEMPLATE
= 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s'
85 def _real_extract(self
, url
):
86 video_id
= self
._match
_id
(url
)
88 webpage
= self
._download
_webpage
(url
, video_id
)
90 ustream_url
= UstreamIE
._extract
_url
(webpage
)
92 return self
.url_result(ustream_url
, UstreamIE
.ie_key())
95 bc
= self
._search
_regex
(
96 r
"(<[^>]+id='brightcove-player-embed'[^>]+>)",
97 webpage
, 'brightcove embed', default
=None)
99 bc_attr
= extract_attributes(bc
)
100 bc_url
= self
.BRIGHTCOVE_URL_TEMPLATE
% (
101 bc_attr
.get('data-bcaccountid', '3162030207001'),
102 bc_attr
.get('data-noprebcplayerid', 'SyGGpuJy3g'),
103 bc_attr
.get('data-newbcplayerid', 'default'),
104 bc_attr
['data-bcid'])
105 return self
.url_result(smuggle_url(bc_url
, {'source_url': url
}))
107 def add_referer(formats
):
109 f
.setdefault('http_headers', {})['Referer'] = url
111 # As of 01.12.2020 this path looks to cover all cases making the rest
112 # of the code unnecessary
113 jwsetup
= self
._parse
_json
(
115 r
'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage
, 'jwsetup',
117 video_id
, transform_source
=js_to_json
, fatal
=False)
119 info
= self
._parse
_jwplayer
_data
(
120 jwsetup
, video_id
, require_title
=False, m3u8_id
='hls',
122 add_referer(info
['formats'])
123 for subtitles
in info
['subtitles'].values():
124 for subtitle
in subtitles
:
125 ext
= determine_ext(subtitle
['url'])
128 subtitle
['ext'] = ext
129 ld_info
= self
._search
_json
_ld
(webpage
, video_id
, default
={})
131 title
= get_element_by_class('video-page-title', webpage
)
132 except compat_HTMLParseError
:
135 title
= self
._og
_search
_title
(webpage
)
136 description
= get_element_by_attribute('itemprop', 'description', webpage
) or \
137 self
._html
_search
_meta
(['og:description', 'description'], webpage
)
138 return merge_dicts(info
, ld_info
, {
140 'thumbnail': get_element_by_attribute('itemprop', 'thumbnailUrl', webpage
),
141 'description': description
,
142 'timestamp': parse_iso8601(get_element_by_attribute('itemprop', 'uploadDate', webpage
)),
143 'location': get_element_by_attribute('itemprop', 'contentLocation', webpage
),
144 'duration': int_or_none(self
._search
_regex
(
145 r
'jwsetup\.seclength\s*=\s*(\d+);',
146 webpage
, 'duration', fatal
=False)),
147 'view_count': str_to_int(self
._search
_regex
(
148 r
"<span[^>]+class='views'[^>]*>([\d,]+)\s+Views</span>",
149 webpage
, 'views', fatal
=False)),
153 # We first look for clipid, because clipprog always appears before
154 patterns
= [rf
'id=\'clip({t})\'\s*value=\'([0-9]+)\'' for t
in ('id', 'prog')]
155 results
= list(filter(None, (re
.search(p
, webpage
) for p
in patterns
)))
158 video_type
, video_id
= matches
.groups()
159 video_type
= 'clip' if video_type
== 'id' else 'program'
161 m
= re
.search(r
'data-(?P<type>clip|prog)id=["\'](?P
<id>\d
+)', webpage)
163 video_id = m.group('id')
164 video_type = 'program
' if m.group('type') == 'prog
' else 'clip
'
166 senate_isvp_url = SenateISVPIE._extract_url(webpage)
168 title = self._og_search_title(webpage)
169 surl = smuggle_url(senate_isvp_url, {'force_title
': title})
170 return self.url_result(surl, 'SenateISVP
', video_id, title)
171 video_id = self._search_regex(
172 r'jwsetup\
.clipprog\s
*=\s
*(\d
+);',
173 webpage, 'jwsetup program
id', default=None)
175 video_type = 'program
'
176 if video_type is None or video_id is None:
177 error_message = get_element_by_class('VLplayer
-error
-message
', webpage)
179 raise ExtractorError(error_message)
180 raise ExtractorError('unable to find video
id and type')
182 def get_text_attr(d, attr):
183 return d.get(attr, {}).get('#text')
185 data
= self
._download
_json
(
186 f
'http://www.c-span.org/assets/player/ajax-player.php?os=android&html5={video_type}&id={video_id}',
188 if data
['@status'] != 'Success':
189 raise ExtractorError('{} said: {}'.format(self
.IE_NAME
, get_text_attr(data
, 'error')), expected
=True)
191 doc
= self
._download
_xml
(
192 f
'http://www.c-span.org/common/services/flashXml.php?{video_type}id={video_id}',
195 description
= self
._html
_search
_meta
('description', webpage
)
197 title
= find_xpath_attr(doc
, './/string', 'name', 'title').text
198 thumbnail
= find_xpath_attr(doc
, './/string', 'name', 'poster').text
200 files
= data
['files']
201 capfile
= get_text_attr(data
, 'capfile')
204 for partnum
, f
in enumerate(files
):
206 for quality
in f
.get('qualities', []):
208 'format_id': '{}-{}p'.format(get_text_attr(quality
, 'bitrate'), get_text_attr(quality
, 'height')),
209 'url': unescapeHTML(get_text_attr(quality
, 'file')),
210 'height': int_or_none(get_text_attr(quality
, 'height')),
211 'tbr': int_or_none(get_text_attr(quality
, 'bitrate')),
214 path
= unescapeHTML(get_text_attr(f
, 'path'))
217 formats
= self
._extract
_m
3u8_formats
(
218 path
, video_id
, 'mp4', entry_protocol
='m3u8_native',
219 m3u8_id
='hls') if determine_ext(path
) == 'm3u8' else [{'url': path
}]
222 'id': f
'{video_id}_{partnum + 1}',
224 title
if len(files
) == 1 else
225 f
'{title} part {partnum + 1}'),
227 'description': description
,
228 'thumbnail': thumbnail
,
229 'duration': int_or_none(get_text_attr(f
, 'length')),
233 'ext': determine_ext(capfile
, 'dfxp'),
235 } if capfile
else None,
238 if len(entries
) == 1:
239 entry
= dict(entries
[0])
240 entry
['id'] = 'c' + video_id
if video_type
== 'clip' else video_id
247 'id': 'c' + video_id
if video_type
== 'clip' else video_id
,
251 class CSpanCongressIE(InfoExtractor
):
252 _VALID_URL
= r
'https?://(?:www\.)?c-span\.org/congress/'
254 'url': 'https://www.c-span.org/congress/?chamber=house&date=2017-12-13&t=1513208380',
256 'id': 'house_2017-12-13',
257 'title': 'Congressional Chronicle - Members of Congress, Hearings and More',
258 'description': 'md5:54c264b7a8f219937987610243305a84',
259 'thumbnail': r
're:https://ximage.c-spanvideo.org/.+',
264 def _real_extract(self
, url
):
265 query
= parse_qs(url
)
266 video_date
= query
.get('date', [None])[0]
267 video_id
= join_nonempty(query
.get('chamber', ['senate'])[0], video_date
, delim
='_')
268 webpage
= self
._download
_webpage
(url
, video_id
)
270 jwp_date
= re
.search(r
'jwsetup.clipprogdate = \'(?P
<date
>\d{4}
-\d{2}
-\d{2}
)\';', webpage)
272 video_id = f'{video_id}_
{jwp_date
.group("date")}'
273 jwplayer_data = self._parse_json(
274 self._search_regex(r'jwsetup\s
*=\s
*({(?
:.|
\n)[^
;]+});', webpage, 'player config
'),
275 video_id, transform_source=js_to_json)
277 title = self._generic_title('', webpage)
278 description = (self._og_search_description(webpage, default=None)
279 or self._html_search_meta('description
', webpage, 'description
', default=None))
282 **self._parse_jwplayer_data(jwplayer_data, video_id, False),
283 'title
': re.sub(r'\s
+', ' ', title.split('|
')[0]).strip(),
284 'description
': description,
285 'http_headers
': {'Referer
': 'https
://www
.c
-span
.org
/'},