3 from .common
import InfoExtractor
17 class TelegramEmbedIE(InfoExtractor
):
18 IE_NAME
= 'telegram:embed'
19 _VALID_URL
= r
'https?://t\.me/(?P<channel_id>[^/]+)/(?P<id>\d+)'
21 'url': 'https://t.me/europa_press/613',
22 'md5': 'dd707708aea958c11a590e8068825f22',
26 'title': 'md5:6ce2d7e8d56eda16d80607b23db7b252',
27 'description': 'md5:6ce2d7e8d56eda16d80607b23db7b252',
28 'channel_id': 'europa_press',
29 'channel': 'Europa Press ✔',
30 'thumbnail': r
're:^https?://.+',
31 'timestamp': 1635631203,
32 'upload_date': '20211030',
37 'url': 'https://t.me/vorposte/29342',
39 'id': 'vorposte-29342',
40 'title': 'Форпост 29342',
41 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
45 'skip_download': True,
48 # 2-video post with --no-playlist
49 'url': 'https://t.me/vorposte/29343',
50 'md5': '1724e96053c18e788c8464038876e245',
54 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
55 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
56 'channel_id': 'vorposte',
58 'thumbnail': r
're:^https?://.+',
59 'timestamp': 1666384480,
60 'upload_date': '20221021',
67 # 2-video post with 'single' query param
68 'url': 'https://t.me/vorposte/29342?single',
69 'md5': 'd20b202f1e41400a9f43201428add18f',
73 'title': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
74 'description': 'md5:9d92e22169a3e136d5d69df25f82c3dc',
75 'channel_id': 'vorposte',
77 'thumbnail': r
're:^https?://.+',
78 'timestamp': 1666384480,
79 'upload_date': '20221021',
84 def _real_extract(self
, url
):
85 channel_id
, msg_id
= self
._match
_valid
_url
(url
).group('channel_id', 'id')
86 embed
= self
._download
_webpage
(
87 url
, msg_id
, query
={'embed': '1', 'single': []}, note
='Downloading embed frame')
89 def clean_text(html_class
, html
):
90 text
= clean_html(get_element_by_class(html_class
, html
))
91 return text
.replace('\n', ' ') if text
else None
93 description
= clean_text('tgme_widget_message_text', embed
)
95 'title': description
or '',
96 'description': description
,
97 'channel': clean_text('tgme_widget_message_author', embed
),
98 'channel_id': channel_id
,
99 'timestamp': unified_timestamp(self
._search
_regex
(
100 r
'<time[^>]*datetime="([^"]*)"', embed
, 'timestamp', fatal
=False)),
104 for video
in re
.findall(r
'<a class="tgme_widget_message_video_player(?s:.+?)</time>', embed
):
105 video_url
= self
._search
_regex
(
106 r
'<video[^>]+src="([^"]+)"', video
, 'video URL', fatal
=False)
107 webpage_url
= self
._search
_regex
(
108 r
'<a class="tgme_widget_message_video_player[^>]+href="([^"]+)"',
109 video
, 'webpage URL', fatal
=False)
110 if not video_url
or not webpage_url
:
117 'id': url_basename(webpage_url
),
118 'webpage_url': update_url_query(webpage_url
, {'single': True}),
119 'duration': parse_duration(self
._search
_regex
(
120 r
'<time[^>]+duration[^>]*>([\d:]+)</time>', video
, 'duration', fatal
=False)),
121 'thumbnail': self
._search
_regex
(
122 r
'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^
\']+)\'\
)',
123 video, 'thumbnail
', fatal=False),
129 if len(videos) > 1 and 'single
' not in parse_qs(url, keep_blank_values=True):
130 playlist_id = f'{channel_id}
-{msg_id}
'
132 if self._yes_playlist(playlist_id, msg_id):
133 return self.playlist_result(
134 videos, playlist_id, format_field(message, 'channel
', f'%s {msg_id}
'), description)
136 return traverse_obj(videos, lambda _, x: x['id'] == msg_id, get_all=False)