4 from .common
import InfoExtractor
17 class GlomexBaseIE(InfoExtractor
):
18 _DEFAULT_ORIGIN_URL
= 'https://player.glomex.com/'
19 _API_URL
= 'https://integration-cloudfront-eu-west-1.mes.glomex.cloud/'
22 def _smuggle_origin_url(url
, origin_url
):
23 if origin_url
is None:
25 return smuggle_url(url
, {'origin': origin_url
})
28 def _unsmuggle_origin_url(cls
, url
, fallback_origin_url
=None):
29 defaults
= {'origin': fallback_origin_url
or cls
._DEFAULT
_ORIGIN
_URL
}
30 unsmuggled_url
, data
= unsmuggle_url(url
, default
=defaults
)
31 return unsmuggled_url
, data
['origin']
33 def _get_videoid_type(self
, video_id
):
37 'rl': 'related videos playlist',
38 'cl': 'curated playlist',
40 prefix
= video_id
.split('-')[0]
41 return _VIDEOID_TYPES
.get(prefix
, 'unknown type')
43 def _download_api_data(self
, video_id
, integration
, current_url
=None):
45 'integration_id': integration
,
46 'playlist_id': video_id
,
47 'current_url': current_url
or self
._DEFAULT
_ORIGIN
_URL
,
49 video_id_type
= self
._get
_videoid
_type
(video_id
)
50 return self
._download
_json
(
52 video_id
, f
'Downloading {video_id_type} JSON',
53 f
'Unable to download {video_id_type} JSON',
56 def _download_and_extract_api_data(self
, video_id
, integration
, current_url
):
57 api_data
= self
._download
_api
_data
(video_id
, integration
, current_url
)
58 videos
= api_data
['videos']
60 raise ExtractorError(f
'no videos found for {video_id}')
61 videos
= [self
._extract
_api
_data
(video
, video_id
) for video
in videos
]
62 return videos
[0] if len(videos
) == 1 else self
.playlist_result(videos
, video_id
)
64 def _extract_api_data(self
, video
, video_id
):
65 if video
.get('error_code') == 'contentGeoblocked':
66 self
.raise_geo_restricted(countries
=video
['geo_locations'])
68 formats
, subs
= [], {}
69 for format_id
, format_url
in video
['source'].items():
70 ext
= determine_ext(format_url
)
72 formats_
, subs_
= self
._extract
_m
3u8_formats
_and
_subtitles
(
73 format_url
, video_id
, 'mp4', m3u8_id
=format_id
,
75 formats
.extend(formats_
)
76 self
._merge
_subtitles
(subs_
, target
=subs
)
80 'format_id': format_id
,
82 if video
.get('language'):
84 fmt
['language'] = video
['language']
86 images
= (video
.get('images') or []) + [video
.get('image') or {}]
88 'id': image
.get('id'),
89 'url': f
'{image["url"]}/profile:player-960x540',
92 } for image
in images
if image
.get('url')]
93 self
._remove
_duplicate
_formats
(thumbnails
)
96 'id': video
.get('clip_id') or video_id
,
97 'title': video
.get('title'),
98 'description': video
.get('description'),
99 'thumbnails': thumbnails
,
100 'duration': int_or_none(video
.get('clip_duration')),
101 'timestamp': video
.get('created_at'),
107 class GlomexIE(GlomexBaseIE
):
109 IE_DESC
= 'Glomex videos'
110 _VALID_URL
= r
'https?://video\.glomex\.com/[^/]+/(?P<id>v-[^-]+)'
111 _INTEGRATION_ID
= '19syy24xjn1oqlpc'
114 'url': 'https://video.glomex.com/sport/v-cb24uwg77hgh-nach-2-0-sieg-guardiola-mit-mancity-vor-naechstem-titel',
115 'md5': 'cec33a943c4240c9cb33abea8c26242e',
117 'id': 'v-cb24uwg77hgh',
119 'title': 'md5:38a90cedcfadd72982c81acf13556e0c',
120 'description': 'md5:1ea6b6caff1443fcbbba159e432eedb8',
122 'timestamp': 1619895017,
123 'upload_date': '20210501',
127 def _real_extract(self
, url
):
128 video_id
= self
._match
_id
(url
)
129 return self
.url_result(
130 GlomexEmbedIE
.build_player_url(video_id
, self
._INTEGRATION
_ID
, url
),
131 GlomexEmbedIE
.ie_key(), video_id
)
134 class GlomexEmbedIE(GlomexBaseIE
):
135 IE_NAME
= 'glomex:embed'
136 IE_DESC
= 'Glomex embedded videos'
137 _BASE_PLAYER_URL
= '//player.glomex.com/integration/1/iframe-player.html'
138 _BASE_PLAYER_URL_RE
= re
.escape(_BASE_PLAYER_URL
).replace('/1/', r
'/[^/]/')
139 _VALID_URL
= rf
'https?:{_BASE_PLAYER_URL_RE}\?([^#]+&)?playlistId=(?P<id>[^#&]+)'
142 'url': 'https://player.glomex.com/integration/1/iframe-player.html?integrationId=4059a013k56vb2yd&playlistId=v-cfa6lye0dkdd-sf',
143 'md5': '68f259b98cc01918ac34180142fce287',
145 'id': 'v-cfa6lye0dkdd-sf',
147 'timestamp': 1635337199,
149 'upload_date': '20211027',
150 'description': 'md5:e741185fc309310ff5d0c789b437be66',
151 'title': 'md5:35647293513a6c92363817a0fb0a7961',
154 'url': 'https://player.glomex.com/integration/1/iframe-player.html?origin=fullpage&integrationId=19syy24xjn1oqlpc&playlistId=rl-vcb49w1fb592p&playlistIndex=0',
156 'id': 'rl-vcb49w1fb592p',
158 'playlist_count': 100,
160 'url': 'https://player.glomex.com/integration/1/iframe-player.html?playlistId=cl-bgqaata6aw8x&integrationId=19syy24xjn1oqlpc',
162 'id': 'cl-bgqaata6aw8x',
164 'playlist_mincount': 2,
168 def build_player_url(cls
, video_id
, integration
, origin_url
=None):
169 query_string
= urllib
.parse
.urlencode({
170 'playlistId': video_id
,
171 'integrationId': integration
,
173 return cls
._smuggle
_origin
_url
(f
'https:{cls._BASE_PLAYER_URL}?{query_string}', origin_url
)
176 def _extract_embed_urls(cls
, url
, webpage
):
177 # https://docs.glomex.com/publisher/video-player-integration/javascript-api/
181 <iframe[^>]+?src=(?P<q>{quot_re})(?P<url>
182 (?:https?:)?{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=q)).)+
184 for mobj in re.finditer(regex, webpage):
185 embed_url = unescapeHTML(mobj.group('url
'))
186 if cls.suitable(embed_url):
187 yield cls._smuggle_origin_url(embed_url, url)
190 <glomex-player [^>]+?>|
191 <div[^>]* data-glomex-player=(?P<q>{quot_re})true(?P=q)[^>]*>'''
192 for mobj in re.finditer(regex, webpage):
193 attrs = extract_attributes(mobj.group(0))
194 if attrs.get('data
-integration
-id') and attrs.get('data
-playlist
-id'):
195 yield cls.build_player_url(attrs['data
-playlist
-id'], attrs['data
-integration
-id'], url)
197 # naive parsing of inline scripts for hard-coded integration parameters
199 (?P<is_js>dataset\.)?%s\s*(?(is_js)=|:)\s*
200 (?P<q>{quot_re})(?P<id>(?:(?!(?P=q)).)+)(?P=q)\s'''
201 for mobj in re.finditer(r'(?x
)<script
[^
<]*>.+?
</script
>', webpage):
202 script = mobj.group(0)
203 integration_id = re.search(regex % 'integrationId
', script)
204 if not integration_id:
206 playlist_id = re.search(regex % 'playlistId
', script)
208 yield cls.build_player_url(playlist_id, integration_id, url)
210 def _real_extract(self, url):
211 url, origin_url = self._unsmuggle_origin_url(url)
212 playlist_id = self._match_id(url)
213 integration = parse_qs(url).get('integrationId
', [None])[0]
215 raise ExtractorError('No integrationId
in URL
', expected=True)
216 return self._download_and_extract_api_data(playlist_id, integration, origin_url)