3 from .common
import InfoExtractor
4 from .redge
import RedCDNLivxIE
12 from ..utils
.traversal
import traverse_obj
16 last_march
= dt
.datetime(date
.year
, 3, 31)
17 last_october
= dt
.datetime(date
.year
, 10, 31)
18 last_sunday_march
= last_march
- dt
.timedelta(days
=last_march
.isoweekday() % 7)
19 last_sunday_october
= last_october
- dt
.timedelta(days
=last_october
.isoweekday() % 7)
20 return last_sunday_march
.replace(hour
=2) <= date
<= last_sunday_october
.replace(hour
=3)
23 def rfc3339_to_atende(date
):
24 date
= dt
.datetime
.fromisoformat(date
)
25 date
= date
+ dt
.timedelta(hours
=1 if is_dst(date
) else 0)
26 return int((date
.timestamp() - 978307200) * 1000)
29 class SejmIE(InfoExtractor
):
31 r
'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P<id>[\dA-F]+)',
32 r
'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P<term>\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P<id>[\dA-F]+)',
33 r
'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P<term>\d+)\.nsf/VideoFrame\.xsp/(?P<id>[\dA-F]+)',
38 # multiple cameras, polish SL iterpreter
39 'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5',
41 'id': '6181EF1AD9CEEBB5C1258A6D006452B5',
42 'title': '1. posiedzenie Sejmu X kadencji',
44 'live_status': 'was_live',
45 'location': 'Sala Posiedzeń',
49 'id': 'ENC01-722340000000-722360145000',
52 'title': '1. posiedzenie Sejmu X kadencji - ENC01',
53 'live_status': 'was_live',
57 'id': 'ENC30-722340000000-722360145000',
60 'title': '1. posiedzenie Sejmu X kadencji - ENC30',
61 'live_status': 'was_live',
65 'id': 'ENC31-722340000000-722360145000',
68 'title': '1. posiedzenie Sejmu X kadencji - ENC31',
69 'live_status': 'was_live',
73 'id': 'ENC32-722340000000-722360145000',
76 'title': '1. posiedzenie Sejmu X kadencji - ENC32',
77 'live_status': 'was_live',
80 # sign lang interpreter
82 'id': 'Migacz-ENC01-1-722340000000-722360145000',
85 'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01',
86 'live_status': 'was_live',
90 'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2',
92 'id': '9377A9D65518E9A5C125808E002E9FF2',
93 'title': 'Debata "Lepsza Polska: obywatelska"',
94 'description': 'KP .Nowoczesna',
96 'live_status': 'was_live',
97 'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)',
101 'id': 'ENC08-1-503831270000-503840040000',
104 'title': 'Debata "Lepsza Polska: obywatelska" - ENC08',
105 'live_status': 'was_live',
109 # 7th term is very special, since it does not use redcdn livx
110 'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F',
112 'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
113 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
114 'description': 'SLD - Biuro Prasowe Klubu',
116 'location': 'sala 101/bud. C',
117 'live_status': 'was_live',
121 'id': 'A6E6D475ECCC6FE5C1257EF90034817F',
123 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu',
128 'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492',
129 'only_matching': True,
132 def _real_extract(self
, url
):
133 term
, video_id
= self
._match
_valid
_url
(url
).group('term', 'id')
134 frame
= self
._download
_webpage
(
135 f
'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}',
137 # despite it says "transmisje_arch", it works for live streams too!
138 data
= self
._download
_json
(
139 f
'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}',
141 params
= data
['params']
143 title
= strip_or_none(data
.get('title'))
145 if data
.get('status') == 'VIDEO_ENDED':
146 live_status
= 'was_live'
147 elif data
.get('status') == 'VIDEO_PLAYING':
148 live_status
= 'is_live'
151 self
.report_warning(f
'unknown status: {data.get("status")}')
153 start_time
= rfc3339_to_atende(params
['start'])
154 # current streams have a stop time of *expected* end of session, but actual times
155 # can change during the transmission. setting a stop_time would artificially
156 # end the stream at that time, while the session actually keeps going.
157 if live_status
== 'was_live':
158 stop_time
= rfc3339_to_atende(params
['stop'])
159 duration
= (stop_time
- start_time
) // 1000
161 stop_time
, duration
= None, None
165 def add_entry(file, legacy_file
=False):
168 file = self
._proto
_relative
_url
(file)
170 file = update_url_query(file, {'startTime': start_time
})
171 if stop_time
is not None:
172 file = update_url_query(file, {'stopTime': stop_time
})
173 stream_id
= self
._search
_regex
(r
'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id')
176 'duration': duration
,
187 '_type': 'url_transparent',
188 'ie_key': RedCDNLivxIE
.ie_key(),
190 'title': join_nonempty(title
, stream_id
, delim
=' - '),
193 cameras
= self
._search
_json
(
194 r
'var\s+cameras\s*=', frame
, 'camera list', video_id
,
195 contains_pattern
=r
'\[(?s:.+)\]', transform_source
=js_to_json
,
197 for camera_file
in traverse_obj(cameras
, (..., 'file', {dict}
)):
198 if camera_file
.get('flv'):
199 add_entry(camera_file
['flv'])
200 elif camera_file
.get('mp4'):
201 # this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx
202 add_entry(camera_file
['mp4'], legacy_file
=True)
204 self
.report_warning('Unknown camera stream type found')
206 if params
.get('mig'):
207 add_entry(self
._search
_regex
(r
"var sliUrl\s*=\s*'([^']+)'", frame
, 'sign language interpreter url', fatal
=False))
214 'description': clean_html(data
.get('desc')) or None,
215 'duration': duration
,
216 'live_status': live_status
,
217 'location': strip_or_none(data
.get('location')),