1 from .common
import InfoExtractor
15 class EuropaIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://ec\.europa\.eu/avservices/(?:video/player|audio/audioDetails)\.cfm\?.*?\bref=(?P<id>[A-Za-z0-9-]+)'
19 'url': 'http://ec.europa.eu/avservices/video/player.cfm?ref=I107758',
20 'md5': '574f080699ddd1e19a675b0ddf010371',
24 'title': 'TRADE - Wikileaks on TTIP',
25 'description': 'NEW LIVE EC Midday press briefing of 11/08/2015',
26 'thumbnail': r
're:^https?://.*\.jpg$',
27 'upload_date': '20150811',
30 'formats': 'mincount:3',
33 'url': 'http://ec.europa.eu/avservices/video/player.cfm?sitelang=en&ref=I107786',
34 'only_matching': True,
36 'url': 'http://ec.europa.eu/avservices/audio/audioDetails.cfm?ref=I-109295&sitelang=en',
37 'only_matching': True,
40 def _real_extract(self
, url
):
41 video_id
= self
._match
_id
(url
)
43 playlist
= self
._download
_xml
(
44 'http://ec.europa.eu/avservices/video/player/playlist.cfm?ID=%s' % video_id
, video_id
)
46 def get_item(type_
, preference
):
48 for item
in playlist
.findall('./info/%s/item' % type_
):
49 lang
, label
= xpath_text(item
, 'lg', default
=None), xpath_text(item
, 'label', default
=None)
51 items
[lang
] = label
.strip()
57 preferred_lang
= query
.get('sitelang', ('en', ))[0]
59 preferred_langs
= orderedSet((preferred_lang
, 'en', 'int'))
61 title
= get_item('title', preferred_langs
) or video_id
62 description
= get_item('description', preferred_langs
)
63 thumbnail
= xpath_text(playlist
, './info/thumburl', 'thumbnail')
64 upload_date
= unified_strdate(xpath_text(playlist
, './info/date', 'upload date'))
65 duration
= parse_duration(xpath_text(playlist
, './info/duration', 'duration'))
66 view_count
= int_or_none(xpath_text(playlist
, './info/views', 'views'))
68 language_preference
= qualities(preferred_langs
[::-1])
71 for file_
in playlist
.findall('./files/file'):
72 video_url
= xpath_text(file_
, './url')
75 lang
= xpath_text(file_
, './lg')
79 'format_note': xpath_text(file_
, './lglabel'),
80 'language_preference': language_preference(lang
)
86 'description': description
,
87 'thumbnail': thumbnail
,
88 'upload_date': upload_date
,
90 'view_count': view_count
,
95 class EuroParlWebstreamIE(InfoExtractor
):
97 https?://multimedia\.europarl\.europa\.eu/[^/#?]+/
98 (?:(?!video)[^/#?]+/[\w-]+_)(?P<id>[\w-]+)
101 'url': 'https://multimedia.europarl.europa.eu/pl/webstreaming/plenary-session_20220914-0900-PLENARY',
103 'id': '62388b15-d85b-4add-99aa-ba12ccf64f0d',
105 'title': 'Plenary session',
106 'release_timestamp': 1663139069,
107 'release_date': '20220914',
110 'skip_download': True,
114 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/euroscola_20221115-1000-SPECIAL-EUROSCOLA',
117 'id': '510eda7f-ba72-161b-7ee7-0e836cd2e715',
118 'release_timestamp': 1668502800,
119 'title': 'Euroscola 2022-11-15 19:21',
120 'release_date': '20221115',
121 'live_status': 'is_live',
123 'skip': 'not live anymore'
125 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-culture-and-education_20230301-1130-COMMITTEE-CULT',
127 'id': '7355662c-8eac-445e-4bb9-08db14b0ddd7',
129 'release_date': '20230301',
130 'title': 'Committee on Culture and Education',
131 'release_timestamp': 1677666641,
135 'url': 'https://multimedia.europarl.europa.eu/en/webstreaming/committee-on-environment-public-health-and-food-safety_20230524-0900-COMMITTEE-ENVI',
137 'id': 'e4255f56-10aa-4b3c-6530-08db56d5b0d9',
139 'release_date': '20230524',
140 'title': r
're:Committee on Environment, Public Health and Food Safety \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}',
141 'release_timestamp': 1684911541,
142 'live_status': 'is_live',
144 'skip': 'Not live anymore'
147 def _real_extract(self
, url
):
148 display_id
= self
._match
_id
(url
)
149 webpage
= self
._download
_webpage
(url
, display_id
)
151 webpage_nextjs
= self
._search
_nextjs
_data
(webpage
, display_id
)['props']['pageProps']
153 json_info
= self
._download
_json
(
154 'https://acs-api.europarl.connectedviews.eu/api/FullMeeting', display_id
,
157 'tenantId': 'bae646ca-1fc8-4363-80ba-2c04f06b4968',
158 'externalReference': display_id
161 formats
, subtitles
= [], {}
162 for hls_url
in traverse_obj(json_info
, ((('meetingVideo'), ('meetingVideos', ...)), 'hlsUrl')):
163 fmt
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(hls_url
, display_id
)
165 self
._merge
_subtitles
(subs
, target
=subtitles
)
168 'id': json_info
['id'],
169 'title': traverse_obj(webpage_nextjs
, (('mediaItem', 'title'), ('title', )), get_all
=False),
171 'subtitles': subtitles
,
172 'release_timestamp': parse_iso8601(json_info
.get('startDateTime')),
173 'is_live': traverse_obj(webpage_nextjs
, ('mediaItem', 'mediaSubType')) == 'Live'