1 var cheerio = require('cheerio');
2 var https = require('https');
3 var fs = require('fs');
4 var path = require('path');
5 var querystring = require('querystring');
6 var crypto = require('crypto');
8 const LOGGER = require('@calzoneman/jsli')('google2vtt');
11 var hash = crypto.createHash('md5');
13 return hash.digest('base64').replace(/\//g, ' ')
18 var slice = Array.prototype.slice;
19 var subtitleDir = path.resolve(__dirname, '..', 'google-drive-subtitles');
20 var subtitleLock = {};
21 var ONE_HOUR = 60 * 60 * 1000;
22 var ONE_DAY = 24 * ONE_HOUR;
24 function padZeros(n) {
26 if (n.length < 2) n = '0' + n;
30 function formatTime(time) {
31 var hours = Math.floor(time / 3600);
33 var minutes = Math.floor(time / 60);
35 var seconds = Math.floor(time);
36 var ms = time - seconds;
38 var list = [minutes, seconds];
43 return list.map(padZeros).join(':') + ms.toFixed(3).substring(1);
46 function fixText(text) {
47 return text.replace(/&/g, '&')
48 .replace(/</g, '<')
49 .replace(/>/g, '>')
50 .replace(/"/g, '"')
51 .replace(/'/g, "'")
52 .replace(/-->/g, '-->');
55 exports.convert = function convertSubtitles(subtitles) {
56 var $ = cheerio.load(subtitles, { xmlMode: true });
57 var lines = slice.call($('transcript text').map(function (index, elem) {
58 var start = parseFloat(elem.attribs.start);
59 var end = start + parseFloat(elem.attribs.dur);
61 if (elem.children.length) {
62 text = elem.children[0].data;
67 var line = formatTime(start) + ' --> ' + formatTime(end);
68 line += '\n' + fixText(text) + '\n';
72 return 'WEBVTT\n\n' + lines.join('\n');
75 exports.attach = function setupRoutes(app) {
76 app.get('/gdvtt/:id/:lang/(:name)?.vtt', handleGetSubtitles);
79 function handleGetSubtitles(req, res) {
80 var id = req.params.id;
81 var lang = req.params.lang;
82 var name = req.params.name || '';
83 var vid = req.query.vid;
84 if (typeof vid !== 'string' || typeof id !== 'string' || typeof lang !== 'string') {
85 return res.sendStatus(400);
87 var file = [id, lang, md5(name)].join('_') + '.vtt';
88 var fileAbsolute = path.join(subtitleDir, file);
90 takeSubtitleLock(fileAbsolute, function () {
91 fs.exists(fileAbsolute, function (exists) {
93 res.sendFile(file, { root: subtitleDir });
94 delete subtitleLock[fileAbsolute];
96 fetchSubtitles(id, lang, name, vid, fileAbsolute, function (err) {
97 delete subtitleLock[fileAbsolute];
99 LOGGER.error(err.stack);
100 return res.sendStatus(500);
103 res.sendFile(file, { root: subtitleDir });
110 function fetchSubtitles(id, lang, name, vid, file, cb) {
121 var url = 'https://drive.google.com/timedtext?' + querystring.stringify(query);
122 https.get(url, function (res) {
123 if (res.statusCode !== 200) {
124 return cb(new Error(res.statusMessage));
128 res.setEncoding('utf-8');
129 res.on('data', function (data) {
133 res.on('end', function () {
135 buf = exports.convert(buf);
140 fs.writeFile(file, buf, function (err) {
144 LOGGER.info('Saved subtitle file ' + file);
149 }).on('error', function (err) {
154 function clearOldSubtitles() {
155 fs.readdir(subtitleDir, function (err, files) {
157 LOGGER.error(err.stack);
161 files.forEach(function (file) {
162 fs.stat(path.join(subtitleDir, file), function (err, stats) {
164 LOGGER.error(err.stack);
168 if (stats.mtime.getTime() < Date.now() - ONE_DAY) {
169 LOGGER.info('Deleting old subtitle file: ' + file);
170 fs.unlink(path.join(subtitleDir, file), error => {
173 'Failed to remove file %s: %s',
185 function takeSubtitleLock(filename, cb) {
186 if (!subtitleLock.hasOwnProperty(filename)) {
187 subtitleLock[filename] = true;
188 return setImmediate(cb);
192 var interval = setInterval(function () {
194 if (!subtitleLock.hasOwnProperty(filename) || tries >= 5) {
195 subtitleLock[filename] = true;
196 clearInterval(interval);
197 return setImmediate(cb);
202 setInterval(clearOldSubtitles, ONE_HOUR);