Merge branch '3.0' of https://github.com/calzoneman/sync into 3.0
[KisSync.git] / src / google2vtt.js
blob293b64ac130c6176c49f5dfdbbde66a1afe5adbe
1 var cheerio = require('cheerio');
2 var https = require('https');
3 var fs = require('fs');
4 var path = require('path');
5 var querystring = require('querystring');
6 var crypto = require('crypto');
8 const LOGGER = require('@calzoneman/jsli')('google2vtt');
10 function md5(input) {
11 var hash = crypto.createHash('md5');
12 hash.update(input);
13 return hash.digest('base64').replace(/\//g, ' ')
14 .replace(/\+/g, '#')
15 .replace(/=/g, '-');
18 var slice = Array.prototype.slice;
19 var subtitleDir = path.resolve(__dirname, '..', 'google-drive-subtitles');
20 var subtitleLock = {};
21 var ONE_HOUR = 60 * 60 * 1000;
22 var ONE_DAY = 24 * ONE_HOUR;
24 function padZeros(n) {
25 n = n.toString();
26 if (n.length < 2) n = '0' + n;
27 return n;
30 function formatTime(time) {
31 var hours = Math.floor(time / 3600);
32 time = time % 3600;
33 var minutes = Math.floor(time / 60);
34 time = time % 60;
35 var seconds = Math.floor(time);
36 var ms = time - seconds;
38 var list = [minutes, seconds];
39 if (hours) {
40 list.unshift(hours);
43 return list.map(padZeros).join(':') + ms.toFixed(3).substring(1);
46 function fixText(text) {
47 return text.replace(/&amp;/g, '&')
48 .replace(/&lt;/g, '<')
49 .replace(/&gt;/g, '>')
50 .replace(/&quot;/g, '"')
51 .replace(/&#39;/g, "'")
52 .replace(/-->/g, '--&gt;');
55 exports.convert = function convertSubtitles(subtitles) {
56 var $ = cheerio.load(subtitles, { xmlMode: true });
57 var lines = slice.call($('transcript text').map(function (index, elem) {
58 var start = parseFloat(elem.attribs.start);
59 var end = start + parseFloat(elem.attribs.dur);
60 var text;
61 if (elem.children.length) {
62 text = elem.children[0].data;
63 } else {
64 text = '';
67 var line = formatTime(start) + ' --> ' + formatTime(end);
68 line += '\n' + fixText(text) + '\n';
69 return line;
70 }));
72 return 'WEBVTT\n\n' + lines.join('\n');
75 exports.attach = function setupRoutes(app) {
76 app.get('/gdvtt/:id/:lang/(:name)?.vtt', handleGetSubtitles);
79 function handleGetSubtitles(req, res) {
80 var id = req.params.id;
81 var lang = req.params.lang;
82 var name = req.params.name || '';
83 var vid = req.query.vid;
84 if (typeof vid !== 'string' || typeof id !== 'string' || typeof lang !== 'string') {
85 return res.sendStatus(400);
87 var file = [id, lang, md5(name)].join('_') + '.vtt';
88 var fileAbsolute = path.join(subtitleDir, file);
90 takeSubtitleLock(fileAbsolute, function () {
91 fs.exists(fileAbsolute, function (exists) {
92 if (exists) {
93 res.sendFile(file, { root: subtitleDir });
94 delete subtitleLock[fileAbsolute];
95 } else {
96 fetchSubtitles(id, lang, name, vid, fileAbsolute, function (err) {
97 delete subtitleLock[fileAbsolute];
98 if (err) {
99 LOGGER.error(err.stack);
100 return res.sendStatus(500);
103 res.sendFile(file, { root: subtitleDir });
110 function fetchSubtitles(id, lang, name, vid, file, cb) {
111 var query = {
112 id: id,
113 v: id,
114 vid: vid,
115 lang: lang,
116 name: name,
117 type: 'track',
118 kind: undefined
121 var url = 'https://drive.google.com/timedtext?' + querystring.stringify(query);
122 https.get(url, function (res) {
123 if (res.statusCode !== 200) {
124 return cb(new Error(res.statusMessage));
127 var buf = '';
128 res.setEncoding('utf-8');
129 res.on('data', function (data) {
130 buf += data;
133 res.on('end', function () {
134 try {
135 buf = exports.convert(buf);
136 } catch (e) {
137 return cb(e);
140 fs.writeFile(file, buf, function (err) {
141 if (err) {
142 cb(err);
143 } else {
144 LOGGER.info('Saved subtitle file ' + file);
145 cb();
149 }).on('error', function (err) {
150 cb(err);
154 function clearOldSubtitles() {
155 fs.readdir(subtitleDir, function (err, files) {
156 if (err) {
157 LOGGER.error(err.stack);
158 return;
161 files.forEach(function (file) {
162 fs.stat(path.join(subtitleDir, file), function (err, stats) {
163 if (err) {
164 LOGGER.error(err.stack);
165 return;
168 if (stats.mtime.getTime() < Date.now() - ONE_DAY) {
169 LOGGER.info('Deleting old subtitle file: ' + file);
170 fs.unlink(path.join(subtitleDir, file), error => {
171 if (error) {
172 LOGGER.error(
173 'Failed to remove file %s: %s',
174 file,
175 error.stack
185 function takeSubtitleLock(filename, cb) {
186 if (!subtitleLock.hasOwnProperty(filename)) {
187 subtitleLock[filename] = true;
188 return setImmediate(cb);
191 var tries = 1;
192 var interval = setInterval(function () {
193 tries++;
194 if (!subtitleLock.hasOwnProperty(filename) || tries >= 5) {
195 subtitleLock[filename] = true;
196 clearInterval(interval);
197 return setImmediate(cb);
199 }, 200);
202 setInterval(clearOldSubtitles, ONE_HOUR);
203 clearOldSubtitles();