1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 * Code below is vtt.js the JS WebVTT implementation.
7 * Current source code can be found at http://github.com/mozilla/vtt.js
9 * Code taken from commit b89bfd06cd788a68c67e03f44561afe833db0849
12 * Copyright 2013 vtt.js Contributors
14 * Licensed under the Apache License, Version 2.0 (the "License");
15 * you may not use this file except in compliance with the License.
16 * You may obtain a copy of the License at
18 * http://www.apache.org/licenses/LICENSE-2.0
20 * Unless required by applicable law or agreed to in writing, software
21 * distributed under the License is distributed on an "AS IS" BASIS,
22 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
23 * See the License for the specific language governing permissions and
24 * limitations under the License.
27 import { XPCOMUtils } from "resource://gre/modules/XPCOMUtils.sys.mjs";
31 XPCOMUtils.defineLazyPreferenceGetter(lazy, "DEBUG_LOG",
32 "media.webvtt.debug.logging", false);
34 function LOG(message) {
36 dump("[vtt] " + message + "\n");
40 var _objCreate = Object.create || (function() {
43 if (arguments.length !== 1) {
44 throw new Error('Object.create shim only accepts one parameter.');
51 // Creates a new ParserError object from an errorData object. The errorData
52 // object should have default code and message properties. The default message
53 // property can be overriden by passing in a message parameter.
54 // See ParsingError.Errors below for acceptable errors.
55 function ParsingError(errorData, message) {
56 this.name = "ParsingError";
57 this.code = errorData.code;
58 this.message = message || errorData.message;
60 ParsingError.prototype = _objCreate(Error.prototype);
61 ParsingError.prototype.constructor = ParsingError;
63 // ParsingError metadata for acceptable ParsingErrors.
64 ParsingError.Errors = {
67 message: "Malformed WebVTT signature."
71 message: "Malformed time stamp."
75 // See spec, https://w3c.github.io/webvtt/#collect-a-webvtt-timestamp.
76 function collectTimeStamp(input) {
77 function computeSeconds(h, m, s, f) {
78 if (m > 59 || s > 59) {
81 // The attribute of the milli-seconds can only be three digits.
85 return (h | 0) * 3600 + (m | 0) * 60 + (s | 0) + (f | 0) / 1000;
88 let timestamp = input.match(/^(\d+:)?(\d{2}):(\d{2})\.(\d+)/);
89 if (!timestamp || timestamp.length !== 5) {
93 let hours = timestamp[1]? timestamp[1].replace(":", "") : 0;
94 let minutes = timestamp[2];
95 let seconds = timestamp[3];
96 let milliSeconds = timestamp[4];
98 return computeSeconds(hours, minutes, seconds, milliSeconds);
101 // A settings object holds key/value pairs and will ignore anything but the first
102 // assignment to a specific key.
103 function Settings() {
104 this.values = _objCreate(null);
107 Settings.prototype = {
108 set: function(k, v) {
113 // Return the value for a key, or a default value.
114 // If 'defaultKey' is passed then 'dflt' is assumed to be an object with
115 // a number of possible default values as properties where 'defaultKey' is
116 // the key of the property that will be chosen; otherwise it's assumed to be
118 get: function(k, dflt, defaultKey) {
120 return this.has(k) ? this.values[k] : dflt[defaultKey];
122 return this.has(k) ? this.values[k] : dflt;
124 // Check whether we have a value for a key.
126 return k in this.values;
128 // Accept a setting if its one of the given alternatives.
129 alt: function(k, v, a) {
130 for (let n = 0; n < a.length; ++n) {
138 // Accept a setting if its a valid digits value (int or float)
139 digitsValue: function(k, v) {
140 if (/^-0+(\.[0]*)?$/.test(v)) { // special case for -0.0
142 } else if (/^-?\d+(\.[\d]*)?$/.test(v)) {
143 this.set(k, parseFloat(v));
146 // Accept a setting if its a valid percentage.
147 percent: function(k, v) {
149 if ((m = v.match(/^([\d]{1,3})(\.[\d]*)?%$/))) {
151 if (v >= 0 && v <= 100) {
161 delete this.values[k];
166 // Helper function to parse input into groups separated by 'groupDelim', and
167 // interprete each group as a key/value pair separated by 'keyValueDelim'.
168 function parseOptions(input, callback, keyValueDelim, groupDelim) {
169 let groups = groupDelim ? input.split(groupDelim) : [input];
170 for (let i in groups) {
171 if (typeof groups[i] !== "string") {
174 let kv = groups[i].split(keyValueDelim);
175 if (kv.length !== 2) {
184 function parseCue(input, cue, regionList) {
185 // Remember the original input if we need to throw an error.
187 // 4.1 WebVTT timestamp
188 function consumeTimeStamp() {
189 let ts = collectTimeStamp(input);
191 throw new ParsingError(ParsingError.Errors.BadTimeStamp,
192 "Malformed timestamp: " + oInput);
194 // Remove time stamp from input.
195 input = input.replace(/^[^\s\uFFFDa-zA-Z-]+/, "");
199 // 4.4.2 WebVTT cue settings
200 function consumeCueSettings(input, cue) {
201 let settings = new Settings();
202 parseOptions(input, function (k, v) {
205 // Find the last region we parsed with the same region id.
206 for (let i = regionList.length - 1; i >= 0; i--) {
207 if (regionList[i].id === v) {
208 settings.set(k, regionList[i].region);
214 settings.alt(k, v, ["rl", "lr"]);
217 let vals = v.split(",");
219 settings.digitsValue(k, vals0);
220 settings.percent(k, vals0) ? settings.set("snapToLines", false) : null;
221 settings.alt(k, vals0, ["auto"]);
222 if (vals.length === 2) {
223 settings.alt("lineAlign", vals[1], ["start", "center", "end"]);
228 let vals = v.split(",");
229 if (settings.percent(k, vals[0])) {
230 if (vals.length === 2) {
231 if (!settings.alt("positionAlign", vals[1], ["line-left", "center", "line-right"])) {
232 // Remove the "position" value because the "positionAlign" is not expected value.
233 // It will be set to default value below.
241 settings.percent(k, v);
244 settings.alt(k, v, ["start", "center", "end", "left", "right"]);
247 }, /:/, /\t|\n|\f|\r| /); // groupDelim is ASCII whitespace
249 // Apply default values for any missing fields.
250 // https://w3c.github.io/webvtt/#collect-a-webvtt-block step 11.4.1.3
251 cue.region = settings.get("region", null);
252 cue.vertical = settings.get("vertical", "");
253 cue.line = settings.get("line", "auto");
254 cue.lineAlign = settings.get("lineAlign", "start");
255 cue.snapToLines = settings.get("snapToLines", true);
256 cue.size = settings.get("size", 100);
257 cue.align = settings.get("align", "center");
258 cue.position = settings.get("position", "auto");
259 cue.positionAlign = settings.get("positionAlign", "auto");
262 function skipWhitespace() {
263 input = input.replace(/^[ \f\n\r\t]+/, "");
266 // 4.1 WebVTT cue timings.
268 cue.startTime = consumeTimeStamp(); // (1) collect cue start time
270 if (input.substr(0, 3) !== "-->") { // (3) next characters must match "-->"
271 throw new ParsingError(ParsingError.Errors.BadTimeStamp,
272 "Malformed time stamp (time stamps must be separated by '-->'): " +
275 input = input.substr(3);
277 cue.endTime = consumeTimeStamp(); // (5) collect cue end time
279 // 4.1 WebVTT cue settings list.
281 consumeCueSettings(input, cue);
284 function emptyOrOnlyContainsWhiteSpaces(input) {
285 return input == "" || /^[ \f\n\r\t]+$/.test(input);
288 function containsTimeDirectionSymbol(input) {
289 return input.includes("-->");
292 function maybeIsTimeStampFormat(input) {
293 return /^\s*(\d+:)?(\d{2}):(\d{2})\.(\d+)\s*-->\s*(\d+:)?(\d{2}):(\d{2})\.(\d+)\s*/.test(input);
316 var TAG_ANNOTATION = {
325 const PARSE_CONTENT_MODE = {
326 NORMAL_CUE: "normal_cue",
327 DOCUMENT_FRAGMENT: "document_fragment",
328 REGION_CUE: "region_cue",
330 // Parse content into a document fragment.
331 function parseContent(window, input, mode) {
332 function nextToken() {
333 // Check for end-of-string.
338 // Consume 'n' characters from the input.
339 function consume(result) {
340 input = input.substr(result.length);
344 let m = input.match(/^([^<]*)(<[^>]+>?)?/);
345 // The input doesn't contain a complete tag.
349 // If there is some text before the next tag, return it, otherwise return
351 return consume(m[1] ? m[1] : m[2]);
354 const unescapeHelper = window.document.createElement("div");
355 function unescapeEntities(s) {
358 // Decimal numeric character reference
359 s = s.replace(/&#(\d+);?/g, (candidate, number) => {
361 const codepoint = parseInt(number);
362 return String.fromCodePoint(codepoint);
368 // Hexadecimal numeric character reference
369 s = s.replace(/&#x([\dA-Fa-f]+);?/g, (candidate, number) => {
371 const codepoint = parseInt(number, 16);
372 return String.fromCodePoint(codepoint);
378 // Named character references
379 s = s.replace(/&\w[\w\d]*;?/g, candidate => {
380 // The list of entities is huge, so we use innerHTML instead.
381 // We should probably use setHTML instead once that is available (bug 1650370).
382 // Ideally we would be able to use a faster/simpler variant of setHTML (bug 1731215).
383 unescapeHelper.innerHTML = candidate;
384 const unescaped = unescapeHelper.innerText;
385 if (unescaped == candidate) { // not a valid entity
390 unescapeHelper.innerHTML = "";
395 function shouldAdd(current, element) {
396 return !NEEDS_PARENT[element.localName] ||
397 NEEDS_PARENT[element.localName] === current.localName;
400 // Create an element for this tag.
401 function createElement(type, annotation) {
402 let tagName = TAG_NAME[type];
406 let element = window.document.createElement(tagName);
407 let name = TAG_ANNOTATION[type];
409 element[name] = annotation ? annotation.trim() : "";
414 // https://w3c.github.io/webvtt/#webvtt-timestamp-object
415 // Return hhhhh:mm:ss.fff
416 function normalizedTimeStamp(secondsWithFrag) {
417 let totalsec = parseInt(secondsWithFrag, 10);
418 let hours = Math.floor(totalsec / 3600);
419 let minutes = Math.floor(totalsec % 3600 / 60);
420 let seconds = Math.floor(totalsec % 60);
425 minutes = "0" + minutes;
428 seconds = "0" + seconds;
430 let f = secondsWithFrag.toString().split(".");
432 f = f[1].slice(0, 3).padEnd(3, "0");
436 return hours + ':' + minutes + ':' + seconds + '.' + f;
441 case PARSE_CONTENT_MODE.NORMAL_CUE:
442 root = window.document.createElement("span", {pseudo: "::cue"});
444 case PARSE_CONTENT_MODE.REGION_CUE:
445 root = window.document.createElement("span");
447 case PARSE_CONTENT_MODE.DOCUMENT_FRAGMENT:
448 root = window.document.createDocumentFragment();
453 root.appendChild(window.document.createTextNode(""));
461 while ((t = nextToken()) !== null) {
464 const endTag = t.slice(2, -1);
465 const stackEnd = tagStack.at(-1);
467 // If the closing tag matches, move back up to the parent node.
468 if (stackEnd == endTag) {
470 current = current.parentNode;
472 // If the closing tag is <ruby> and we're at an <rt>, move back up to
473 // the <ruby>'s parent node.
474 } else if (endTag == "ruby" && current.nodeName == "RT") {
476 current = current.parentNode.parentNode;
479 // Otherwise just ignore the end tag.
482 let ts = collectTimeStamp(t.substr(1, t.length - 1));
485 // Timestamps are lead nodes as well.
486 node = window.document.createProcessingInstruction("timestamp", normalizedTimeStamp(ts));
487 current.appendChild(node);
490 let m = t.match(/^<([^.\s/0-9>]+)(\.[^\s\\>]+)?([^>\\]+)?(\\?)>?$/);
491 // If we can't parse the tag, skip to the next tag.
495 // Try to construct an element, and ignore the tag if we couldn't.
496 node = createElement(m[1], m[3]);
500 // Determine if the tag should be added based on the context of where it
501 // is placed in the cuetext.
502 if (!shouldAdd(current, node)) {
505 // Set the class list (as a list of classes, separated by space).
507 node.className = m[2].substr(1).replace('.', ' ');
509 // Append the node to the current node, and enter the scope of the new
512 current.appendChild(node);
517 // Text nodes are leaf nodes.
518 current.appendChild(window.document.createTextNode(unescapeEntities(t)));
524 function StyleBox() {
527 // Apply styles to a div. If there is no div passed then it defaults to the
529 StyleBox.prototype.applyStyles = function(styles, div) {
530 div = div || this.div;
531 for (let prop in styles) {
532 if (styles.hasOwnProperty(prop)) {
533 div.style[prop] = styles[prop];
538 StyleBox.prototype.formatStyle = function(val, unit) {
539 return val === 0 ? 0 : val + unit;
542 // TODO(alwu): remove StyleBox and change other style box to class-based.
544 applyStyles(styles, div) {
545 div = div || this.div;
546 Object.assign(div.style, styles);
549 formatStyle(val, unit) {
550 return val === 0 ? 0 : val + unit;
554 // Constructs the computed display state of the cue (a div). Places the div
555 // into the overlay which should be a block level element (usually a div).
556 class CueStyleBox extends StyleBoxBase {
557 constructor(window, cue, containerBox) {
560 this.div = window.document.createElement("div");
561 this.cueDiv = parseContent(window, cue.text, PARSE_CONTENT_MODE.NORMAL_CUE);
562 this.div.appendChild(this.cueDiv);
564 this.containerHeight = containerBox.height;
565 this.containerWidth = containerBox.width;
566 this.fontSize = this._getFontSize(containerBox);
567 this.isCueStyleBox = true;
569 // As pseudo element won't inherit the parent div's style, so we have to
570 // set the font size explicitly.
571 this._applyDefaultStylesOnBackgroundNode();
572 this._applyDefaultStylesOnRootNode();
575 getCueBoxPositionAndSize() {
576 // As `top`, `left`, `width` and `height` are all represented by the
577 // percentage of the container, we need to convert them to the actual
578 // number according to the container's size.
579 const isWritingDirectionHorizontal = this.cue.vertical == "";
581 this.containerHeight * this._tranferPercentageToFloat(this.div.style.top),
583 this.containerWidth * this._tranferPercentageToFloat(this.div.style.left),
584 width = isWritingDirectionHorizontal ?
585 this.containerWidth * this._tranferPercentageToFloat(this.div.style.width) :
586 this.div.clientWidthDouble,
587 height = isWritingDirectionHorizontal ?
588 this.div.clientHeightDouble :
589 this.containerHeight * this._tranferPercentageToFloat(this.div.style.height);
590 return { top, left, width, height };
593 getFirstLineBoxSize() {
594 // This size would be automatically adjusted by writing direction. When
595 // direction is horizontal, it represents box's height. When direction is
596 // vertical, it represents box's width.
597 return this.div.firstLineBoxBSize;
601 // This function is a workaround which is used to force the reflow in order
602 // to use the correct alignment for bidi text. Now this function would be
603 // called after calculating the final position of the cue box to ensure the
604 // rendering result is correct. See bug1557882 comment3 for more details.
605 // TODO : remove this function and set `unicode-bidi` when initiailizing
606 // the CueStyleBox, after fixing bug1558431.
607 this.applyStyles({ "unicode-bidi": "plaintext" });
611 * Following methods are private functions, should not use them outside this
614 _tranferPercentageToFloat(input) {
615 return input.replace("%", "") / 100.0;
618 _getFontSize(containerBox) {
619 // In https://www.w3.org/TR/webvtt1/#applying-css-properties, the spec
620 // said the font size is '5vh', which means 5% of the viewport height.
621 // However, if we use 'vh' as a basic unit, it would eventually become
622 // 5% of screen height, instead of video's viewport height. Therefore, we
623 // have to use 'px' here to make sure we have the correct font size.
624 return containerBox.height * 0.05 + "px";
627 _applyDefaultStylesOnBackgroundNode() {
628 // most of the properties have been defined in `::cue` in `html.css`, but
629 // there are some css properties we have to set them dynamically.
630 // FIXME(emilio): These are observable by content. Ideally the style
631 // attribute will work like for ::part() and we wouldn't need this.
632 this.cueDiv.style.setProperty("--cue-font-size", this.fontSize, "important");
633 this.cueDiv.style.setProperty("--cue-writing-mode", this._getCueWritingMode(), "important");
636 // spec https://www.w3.org/TR/webvtt1/#applying-css-properties
637 _applyDefaultStylesOnRootNode() {
638 // The variables writing-mode, top, left, width, and height are calculated
639 // in the spec 7.2, https://www.w3.org/TR/webvtt1/#processing-cue-settings
640 // spec 7.2.1, calculate 'writing-mode'.
641 const writingMode = this._getCueWritingMode();
643 // spec 7.2.2 ~ 7.2.7, calculate 'width', 'height', 'left' and 'top'.
644 const {width, height, left, top} = this._getCueSizeAndPosition();
647 "position": "absolute",
648 // "unicode-bidi": "plaintext", (uncomment this line after fixing bug1558431)
649 "writing-mode": writingMode,
654 "overflow-wrap": "break-word",
655 // "text-wrap": "balance", (we haven't supported this CSS attribute yet)
656 "white-space": "pre-line",
657 "font": this.fontSize + " sans-serif",
658 "color": "rgba(255, 255, 255, 1)",
659 "white-space": "pre-line",
660 "text-align": this.cue.align,
664 _getCueWritingMode() {
665 const cue = this.cue;
666 if (cue.vertical == "") {
667 return "horizontal-tb";
669 return cue.vertical == "lr" ? "vertical-lr" : "vertical-rl";
672 _getCueSizeAndPosition() {
673 const cue = this.cue;
674 // spec 7.2.2, determine the value of maximum size for cue as per the
675 // appropriate rules from the following list.
677 let computedPosition = cue.computedPosition;
678 switch (cue.computedPositionAlign) {
680 maximumSize = 100 - computedPosition;
683 maximumSize = computedPosition;
686 maximumSize = computedPosition <= 50 ?
687 computedPosition * 2 : (100 - computedPosition) * 2;
690 const size = Math.min(cue.size, maximumSize);
692 // spec 7.2.5, determine the value of x-position or y-position for cue as
693 // per the appropriate rules from the following list.
694 let xPosition = 0.0, yPosition = 0.0;
695 const isWritingDirectionHorizontal = cue.vertical == "";
696 switch (cue.computedPositionAlign) {
698 if (isWritingDirectionHorizontal) {
699 xPosition = cue.computedPosition;
701 yPosition = cue.computedPosition;
705 if (isWritingDirectionHorizontal) {
706 xPosition = cue.computedPosition - (size / 2);
708 yPosition = cue.computedPosition - (size / 2);
712 if (isWritingDirectionHorizontal) {
713 xPosition = cue.computedPosition - size;
715 yPosition = cue.computedPosition - size;
720 // spec 7.2.6, determine the value of whichever of x-position or
721 // y-position is not yet calculated for cue as per the appropriate rules
722 // from the following list.
723 if (!cue.snapToLines) {
724 if (isWritingDirectionHorizontal) {
725 yPosition = cue.computedLine;
727 xPosition = cue.computedLine;
730 if (isWritingDirectionHorizontal) {
737 left: xPosition + "%",
738 top: yPosition + "%",
739 width: isWritingDirectionHorizontal ? size + "%" : "auto",
740 height: isWritingDirectionHorizontal ? "auto" : size + "%",
745 function RegionNodeBox(window, region, container) {
748 let boxLineHeight = container.height * 0.0533 // 0.0533vh ? 5.33vh
749 let boxHeight = boxLineHeight * region.lines;
750 let boxWidth = container.width * region.width / 100; // convert percentage to px
752 let regionNodeStyles = {
753 position: "absolute",
754 height: boxHeight + "px",
755 width: boxWidth + "px",
756 top: (region.viewportAnchorY * container.height / 100) - (region.regionAnchorY * boxHeight / 100) + "px",
757 left: (region.viewportAnchorX * container.width / 100) - (region.regionAnchorX * boxWidth / 100) + "px",
758 lineHeight: boxLineHeight + "px",
759 writingMode: "horizontal-tb",
760 backgroundColor: "rgba(0, 0, 0, 0.8)",
761 wordWrap: "break-word",
762 overflowWrap: "break-word",
763 font: (boxLineHeight/1.3) + "px sans-serif",
764 color: "rgba(255, 255, 255, 1)",
767 maxHeight: boxHeight + "px",
768 display: "inline-flex",
770 justifyContent: "flex-end",
773 this.div = window.document.createElement("div");
774 this.div.id = region.id; // useless?
775 this.applyStyles(regionNodeStyles);
777 RegionNodeBox.prototype = _objCreate(StyleBox.prototype);
778 RegionNodeBox.prototype.constructor = RegionNodeBox;
780 function RegionCueStyleBox(window, cue) {
782 this.cueDiv = parseContent(window, cue.text, PARSE_CONTENT_MODE.REGION_CUE);
784 let regionCueStyles = {
785 position: "relative",
786 writingMode: "horizontal-tb",
787 unicodeBidi: "plaintext",
790 textAlign: cue.align,
792 // TODO: fix me, LTR and RTL ? using margin replace the "left/right"
794 let offset = cue.computedPosition * cue.region.width / 100;
799 regionCueStyles.left = offset + "%";
800 regionCueStyles.right = "auto";
804 regionCueStyles.left = "auto";
805 regionCueStyles.right = offset + "%";
811 this.div = window.document.createElement("div");
812 this.applyStyles(regionCueStyles);
813 this.div.appendChild(this.cueDiv);
815 RegionCueStyleBox.prototype = _objCreate(StyleBox.prototype);
816 RegionCueStyleBox.prototype.constructor = RegionCueStyleBox;
818 // Represents the co-ordinates of an Element in a way that we can easily
819 // compute things with such as if it overlaps or intersects with other boxes.
822 // Get dimensions by calling getCueBoxPositionAndSize on a CueStyleBox, by
823 // getting offset properties from an HTMLElement (from the object or its
824 // `div` property), otherwise look at the regular box properties on the
826 const isHTMLElement = !obj.isCueStyleBox && (obj.div || obj.tagName);
827 obj = obj.isCueStyleBox ? obj.getCueBoxPositionAndSize() : obj.div || obj;
828 this.top = isHTMLElement ? obj.offsetTop : obj.top;
829 this.left = isHTMLElement ? obj.offsetLeft : obj.left;
830 this.width = isHTMLElement ? obj.offsetWidth : obj.width;
831 this.height = isHTMLElement ? obj.offsetHeight : obj.height;
832 // This value is smaller than 1 app unit (~= 0.0166 px).
837 return this.top + this.height;
841 return this.left + this.width;
844 // This function is used for debugging, it will return the box's information.
845 getBoxInfoInChars() {
846 return `top=${this.top}, bottom=${this.bottom}, left=${this.left}, ` +
847 `right=${this.right}, width=${this.width}, height=${this.height}`;
850 // Move the box along a particular axis. Optionally pass in an amount to move
851 // the box. If no amount is passed then the default is the line height of the
856 LOG(`box's left moved from ${this.left} to ${this.left + toMove}`);
860 LOG(`box's left moved from ${this.left} to ${this.left - toMove}`);
864 LOG(`box's top moved from ${this.top} to ${this.top + toMove}`);
868 LOG(`box's top moved from ${this.top} to ${this.top - toMove}`);
874 // Check if this box overlaps another box, b2.
876 return (this.left < b2.right - this.fuzz) &&
877 (this.right > b2.left + this.fuzz) &&
878 (this.top < b2.bottom - this.fuzz) &&
879 (this.bottom > b2.top + this.fuzz);
882 // Check if this box overlaps any other boxes in boxes.
884 for (let i = 0; i < boxes.length; i++) {
885 if (this.overlaps(boxes[i])) {
892 // Check if this box is within another box.
894 return (this.top >= container.top - this.fuzz) &&
895 (this.bottom <= container.bottom + this.fuzz) &&
896 (this.left >= container.left - this.fuzz) &&
897 (this.right <= container.right + this.fuzz);
900 // Check whether this box is passed over the specfic axis boundary. The axis
901 // is based on the canvas coordinates, the `+x` is rightward and `+y` is
903 isOutsideTheAxisBoundary(container, axis) {
906 return this.right > container.right + this.fuzz;
908 return this.left < container.left - this.fuzz;
910 return this.bottom > container.bottom + this.fuzz;
912 return this.top < container.top - this.fuzz;
916 // Find the percentage of the area that this box is overlapping with another
918 intersectPercentage(b2) {
919 let x = Math.max(0, Math.min(this.right, b2.right) - Math.max(this.left, b2.left)),
920 y = Math.max(0, Math.min(this.bottom, b2.bottom) - Math.max(this.top, b2.top)),
921 intersectArea = x * y;
922 return intersectArea / (this.height * this.width);
926 BoxPosition.prototype.clone = function(){
927 return new BoxPosition(this);
930 function adjustBoxPosition(styleBox, containerBox, controlBarBox, outputBoxes) {
931 const cue = styleBox.cue;
932 const isWritingDirectionHorizontal = cue.vertical == "";
933 let box = new BoxPosition(styleBox);
934 if (!box.width || !box.height) {
935 LOG(`No way to adjust a box with zero width or height.`);
939 // Spec 7.2.10, adjust the positions of boxes according to the appropriate
940 // steps from the following list. Also, we use offsetHeight/offsetWidth here
941 // in order to prevent the incorrect positioning caused by CSS transform
943 const fullDimension = isWritingDirectionHorizontal ?
944 containerBox.height : containerBox.width;
945 if (cue.snapToLines) {
946 LOG(`Adjust position when 'snap-to-lines' is true.`);
947 // The step is the height or width of the line box. We should use font
948 // size directly, instead of using text box's width or height, because the
949 // width or height of the box would be changed when the text is wrapped to
950 // different line. Ex. if text is wrapped to two line, the height or width
951 // of the box would become 2 times of font size.
952 let step = styleBox.getFirstLineBoxSize();
957 // spec 7.2.10.4 ~ 7.2.10.6
958 let line = Math.floor(cue.computedLine + 0.5);
959 if (cue.vertical == "rl") {
960 line = -1 * (line + 1);
963 // spec 7.2.10.7 ~ 7.2.10.8
964 let position = step * line;
965 if (cue.vertical == "rl") {
966 position = position - box.width + step;
971 position += fullDimension;
975 // spec 7.2.10.10, move the box to the specific position along the direction.
976 const movingDirection = isWritingDirectionHorizontal ? "+y" : "+x";
977 box.move(movingDirection, position);
979 // spec 7.2.10.11, remember the position as specified position.
980 let specifiedPosition = box.clone();
982 // spec 7.2.10.12, let title area be a box that covers all of the video’s
984 const titleAreaBox = containerBox.clone();
986 titleAreaBox.height -= controlBarBox.height;
989 function isBoxOutsideTheRenderingArea() {
990 if (isWritingDirectionHorizontal) {
991 // the top side of the box is above the rendering area, or the bottom
992 // side of the box is below the rendering area.
993 return step < 0 && box.top < 0 ||
994 step > 0 && box.bottom > fullDimension;
996 // the left side of the box is outside the left side of the rendering
997 // area, or the right side of the box is outside the right side of the
999 return step < 0 && box.left < 0 ||
1000 step > 0 && box.right > fullDimension;
1003 // spec 7.2.10.13, if none of the boxes in boxes would overlap any of the
1004 // boxes in output, and all of the boxes in boxes are entirely within the
1006 let switched = false;
1007 while (!box.within(titleAreaBox) || box.overlapsAny(outputBoxes)) {
1008 // spec 7.2.10.14, check if we need to switch the direction.
1009 if (isBoxOutsideTheRenderingArea()) {
1010 // spec 7.2.10.17, if `switched` is true, remove all the boxes in
1011 // `boxes`, which means we shouldn't apply any CSS boxes for this cue.
1012 // Therefore, returns null box.
1016 // spec 7.2.10.18 ~ 7.2.10.20
1018 box = specifiedPosition.clone();
1021 // spec 7.2.10.15, moving box along the specific direction.
1022 box.move(movingDirection, step);
1025 if (isWritingDirectionHorizontal) {
1026 styleBox.applyStyles({
1027 top: getPercentagePosition(box.top, fullDimension),
1030 styleBox.applyStyles({
1031 left: getPercentagePosition(box.left, fullDimension),
1035 LOG(`Adjust position when 'snap-to-lines' is false.`);
1036 // (snap-to-lines if false) spec 7.2.10.1 ~ 7.2.10.2
1037 if (cue.lineAlign != "start") {
1038 const isCenterAlign = cue.lineAlign == "center";
1039 const movingDirection = isWritingDirectionHorizontal ? "-y" : "-x";
1040 if (isWritingDirectionHorizontal) {
1041 box.move(movingDirection, isCenterAlign ? box.height : box.height / 2);
1043 box.move(movingDirection, isCenterAlign ? box.width : box.width / 2);
1048 let bestPosition = {},
1049 specifiedPosition = box.clone(),
1050 outsideAreaPercentage = 1; // Highest possible so the first thing we get is better.
1051 let hasFoundBestPosition = false;
1053 // For the different writing directions, we should have different priority
1054 // for the moving direction. For example, if the writing direction is
1055 // horizontal, which means the cues will grow from the top to the bottom,
1056 // then moving cues along the `y` axis should be more important than moving
1057 // cues along the `x` axis, and vice versa for those cues growing from the
1058 // left to right, or from the right to the left. We don't follow the exact
1059 // way which the spec requires, see the reason in bug1575460.
1060 function getAxis(writingDirection) {
1061 if (writingDirection == "") {
1062 return ["+y", "-y", "+x", "-x"];
1064 // Growing from left to right.
1065 if (writingDirection == "lr") {
1066 return ["+x", "-x", "+y", "-y"];
1068 // Growing from right to left.
1069 return ["-x", "+x", "+y", "-y"];
1071 const axis = getAxis(cue.vertical);
1073 // This factor effects the granularity of the moving unit, when using the
1074 // factor=1 often moves too much and results in too many redudant spaces
1075 // between boxes. So we can increase the factor to slightly reduce the
1076 // move we do every time, but still can preverse the reasonable spaces
1079 const toMove = styleBox.getFirstLineBoxSize() / factor;
1080 for (let i = 0; i < axis.length && !hasFoundBestPosition; i++) {
1081 while (!box.isOutsideTheAxisBoundary(containerBox, axis[i]) &&
1082 (!box.within(containerBox) || box.overlapsAny(outputBoxes))) {
1083 box.move(axis[i], toMove);
1085 // We found a spot where we aren't overlapping anything. This is our
1087 if (box.within(containerBox)) {
1088 bestPosition = box.clone();
1089 hasFoundBestPosition = true;
1092 let p = box.intersectPercentage(containerBox);
1093 // If we're outside the container box less then we were on our last try
1094 // then remember this position as the best position.
1095 if (outsideAreaPercentage > p) {
1096 bestPosition = box.clone();
1097 outsideAreaPercentage = p;
1099 // Reset the box position to the specified position.
1100 box = specifiedPosition.clone();
1103 // Can not find a place to place this box inside the rendering area.
1104 if (!box.within(containerBox)) {
1108 styleBox.applyStyles({
1109 top: getPercentagePosition(box.top, containerBox.height),
1110 left: getPercentagePosition(box.left, containerBox.width),
1114 // In order to not be affected by CSS scale, so we use '%' to make sure the
1115 // cue can stick in the right position.
1116 function getPercentagePosition(position, fullDimension) {
1117 return (position / fullDimension) * 100 + "%";
1123 export function WebVTT() {
1124 this.isProcessingCues = false;
1128 // Helper to allow strings to be decoded instead of the default binary utf8 data.
1129 WebVTT.StringDecoder = function() {
1131 decode: function(data) {
1135 if (typeof data !== "string") {
1136 throw new Error("Error - expected string data.");
1138 return decodeURIComponent(encodeURIComponent(data));
1143 WebVTT.convertCueToDOMTree = function(window, cuetext) {
1147 return parseContent(window, cuetext, PARSE_CONTENT_MODE.DOCUMENT_FRAGMENT);
1150 function clearAllCuesDiv(overlay) {
1151 while (overlay.firstChild) {
1152 overlay.firstChild.remove();
1156 // It's used to record how many cues we process in the last `processCues` run.
1157 var lastDisplayedCueNums = 0;
1159 const DIV_COMPUTING_STATE = {
1161 REUSE_AND_CLEAR : 1,
1162 COMPUTE_AND_CLEAR : 2
1165 // Runs the processing model over the cues and regions passed to it.
1166 // Spec https://www.w3.org/TR/webvtt1/#processing-model
1167 // @parem window : JS window
1168 // @param cues : the VTT cues are going to be displayed.
1169 // @param overlay : A block level element (usually a div) that the computed cues
1170 // and regions will be placed into.
1171 // @param controls : A Control bar element. Cues' position will be
1172 // affected and repositioned according to it.
1173 function processCuesInternal(window, cues, overlay, controls) {
1174 LOG(`=== processCues ===`);
1176 LOG(`clear display and abort processing because of no cue.`);
1177 clearAllCuesDiv(overlay);
1178 lastDisplayedCueNums = 0;
1182 let controlBar, controlBarShown;
1184 // controls is a <div> that is the children of the UA Widget Shadow Root.
1185 controlBar = controls.parentNode.getElementById("controlBar");
1186 controlBarShown = controlBar ? !controlBar.hidden : false;
1188 // There is no controls element. This only happen to UA Widget because
1189 // it is created lazily.
1190 controlBarShown = false;
1194 * This function is used to tell us if we have to recompute or reuse current
1195 * cue's display state. Display state is a DIV element with corresponding
1196 * CSS style to display cue on the screen. When the cue is being displayed
1197 * first time, we will compute its display state. After that, we could reuse
1198 * its state until following conditions happen.
1199 * (1) control changes : it means the rendering area changes so we should
1200 * recompute cues' position.
1201 * (2) cue's `hasBeenReset` flag is true : it means cues' line or position
1202 * property has been modified, we also need to recompute cues' position.
1203 * (3) the amount of showing cues changes : it means some cue would disappear
1204 * but other cues should stay at the same place without recomputing, so we
1205 * can resume their display state.
1207 function getDIVComputingState(cues) {
1208 if (overlay.lastControlBarShownStatus != controlBarShown) {
1209 return DIV_COMPUTING_STATE.COMPUTE_AND_CLEAR;
1212 for (let i = 0; i < cues.length; i++) {
1213 if (cues[i].hasBeenReset || !cues[i].displayState) {
1214 return DIV_COMPUTING_STATE.COMPUTE_AND_CLEAR;
1218 if (lastDisplayedCueNums != cues.length) {
1219 return DIV_COMPUTING_STATE.REUSE_AND_CLEAR;
1221 return DIV_COMPUTING_STATE.REUSE;
1224 const divState = getDIVComputingState(cues);
1225 overlay.lastControlBarShownStatus = controlBarShown;
1227 if (divState == DIV_COMPUTING_STATE.REUSE) {
1228 LOG(`reuse current cue's display state and abort processing`);
1232 clearAllCuesDiv(overlay);
1233 let rootOfCues = window.document.createElement("div");
1234 rootOfCues.style.position = "absolute";
1235 rootOfCues.style.left = "0";
1236 rootOfCues.style.right = "0";
1237 rootOfCues.style.top = "0";
1238 rootOfCues.style.bottom = "0";
1239 overlay.appendChild(rootOfCues);
1241 if (divState == DIV_COMPUTING_STATE.REUSE_AND_CLEAR) {
1242 LOG(`clear display but reuse cues' display state.`);
1243 for (let cue of cues) {
1244 rootOfCues.appendChild(cue.displayState);
1246 } else if (divState == DIV_COMPUTING_STATE.COMPUTE_AND_CLEAR) {
1247 LOG(`clear display and recompute cues' display state.`);
1248 let boxPositions = [],
1249 containerBox = new BoxPosition(rootOfCues);
1251 let styleBox, cue, controlBarBox;
1252 if (controlBarShown) {
1253 controlBarBox = new BoxPosition(controlBar);
1254 // Add an empty output box that cover the same region as video control bar.
1255 boxPositions.push(controlBarBox);
1258 // https://w3c.github.io/webvtt/#processing-model 6.1.12.1
1259 // Create regionNode
1260 let regionNodeBoxes = {};
1263 LOG(`lastDisplayedCueNums=${lastDisplayedCueNums}, currentCueNums=${cues.length}`);
1264 lastDisplayedCueNums = cues.length;
1265 for (let i = 0; i < cues.length; i++) {
1267 if (cue.region != null) {
1269 styleBox = new RegionCueStyleBox(window, cue);
1271 if (!regionNodeBoxes[cue.region.id]) {
1272 // create regionNode
1273 // Adjust the container hieght to exclude the controlBar
1274 let adjustContainerBox = new BoxPosition(rootOfCues);
1275 if (controlBarShown) {
1276 adjustContainerBox.height -= controlBarBox.height;
1277 adjustContainerBox.bottom += controlBarBox.height;
1279 regionNodeBox = new RegionNodeBox(window, cue.region, adjustContainerBox);
1280 regionNodeBoxes[cue.region.id] = regionNodeBox;
1283 let currentRegionBox = regionNodeBoxes[cue.region.id];
1284 let currentRegionNodeDiv = currentRegionBox.div;
1286 // TODO: fix me, it looks like the we need to set/change "top" attribute at the styleBox.div
1287 // to do the "scroll up", however, we do not implement it yet?
1288 if (cue.region.scroll == "up" && currentRegionNodeDiv.childElementCount > 0) {
1289 styleBox.div.style.transitionProperty = "top";
1290 styleBox.div.style.transitionDuration = "0.433s";
1293 currentRegionNodeDiv.appendChild(styleBox.div);
1294 rootOfCues.appendChild(currentRegionNodeDiv);
1295 cue.displayState = styleBox.div;
1296 boxPositions.push(new BoxPosition(currentRegionBox));
1298 // Compute the intial position and styles of the cue div.
1299 styleBox = new CueStyleBox(window, cue, containerBox);
1300 rootOfCues.appendChild(styleBox.div);
1302 // Move the cue to correct position, we might get the null box if the
1303 // result of algorithm doesn't want us to show the cue when we don't
1304 // have any room for this cue.
1305 let cueBox = adjustBoxPosition(styleBox, containerBox, controlBarBox, boxPositions);
1307 styleBox.setBidiRule();
1308 // Remember the computed div so that we don't have to recompute it later
1309 // if we don't have too.
1310 cue.displayState = styleBox.div;
1311 boxPositions.push(cueBox);
1312 LOG(`cue ${i}, ` + cueBox.getBoxInfoInChars());
1314 LOG(`can not find a proper position to place cue ${i}`);
1315 // Clear the display state and clear the reset flag in the cue as well,
1316 // which controls whether the task for updating the cue display is
1318 cue.displayState = null;
1319 rootOfCues.removeChild(styleBox.div);
1324 LOG(`[ERROR] unknown div computing state`);
1328 WebVTT.processCues = function(window, cues, overlay, controls) {
1329 // When accessing `offsetXXX` attributes of element, it would trigger reflow
1330 // and might result in a re-entry of this function. In order to avoid doing
1331 // redundant computation, we would only do one processing at a time.
1332 if (this.isProcessingCues) {
1335 this.isProcessingCues = true;
1336 processCuesInternal(window, cues, overlay, controls);
1337 this.isProcessingCues = false;
1340 WebVTT.Parser = function(window, decoder) {
1341 this.window = window;
1342 this.state = "INITIAL";
1344 this.substatebuffer = "";
1346 this.decoder = decoder || new TextDecoder("utf8");
1347 this.regionList = [];
1348 this.isPrevLineBlank = false;
1351 WebVTT.Parser.prototype = {
1352 // If the error is a ParsingError then report it to the consumer if
1353 // possible. If it's not a ParsingError then throw it like normal.
1354 reportOrThrowError: function(e) {
1355 if (e instanceof ParsingError) {
1356 this.onparsingerror && this.onparsingerror(e);
1361 parse: function (data) {
1362 // If there is no data then we won't decode it, but will just try to parse
1363 // whatever is in buffer already. This may occur in circumstances, for
1364 // example when flush() is called.
1366 // Try to decode the data that we received.
1367 this.buffer += this.decoder.decode(data, {stream: true});
1370 // This parser is line-based. Let's see if we have a line to parse.
1371 while (/\r\n|\n|\r/.test(this.buffer)) {
1372 let buffer = this.buffer;
1374 while (buffer[pos] !== '\r' && buffer[pos] !== '\n') {
1377 let line = buffer.substr(0, pos);
1378 // Advance the buffer early in case we fail below.
1379 if (buffer[pos] === '\r') {
1382 if (buffer[pos] === '\n') {
1385 this.buffer = buffer.substr(pos);
1387 // Spec defined replacement.
1388 line = line.replace(/[\u0000]/g, "\uFFFD");
1390 // Detect the comment. We parse line on the fly, so we only check if the
1391 // comment block is preceded by a blank line and won't check if it's
1392 // followed by another blank line.
1393 // https://www.w3.org/TR/webvtt1/#introduction-comments
1394 // TODO (1703895): according to the spec, the comment represents as a
1395 // comment block, so we need to refactor the parser in order to better
1396 // handle the comment block.
1397 if (this.isPrevLineBlank && /^NOTE($|[ \t])/.test(line)) {
1398 LOG("Ignore comment that starts with 'NOTE'");
1400 this.parseLine(line);
1402 this.isPrevLineBlank = emptyOrOnlyContainsWhiteSpaces(line);
1407 parseLine: function(line) {
1410 function createCueIfNeeded() {
1412 self.cue = new self.window.VTTCue(0, 0, "");
1416 // Parsing cue identifier and the identifier should be unique.
1417 // Return true if the input is a cue identifier.
1418 function parseCueIdentifier(input) {
1419 if (maybeIsTimeStampFormat(input)) {
1424 createCueIfNeeded();
1425 // TODO : ensure the cue identifier is unique among all cue identifiers.
1426 self.cue.id = containsTimeDirectionSymbol(input) ? "" : input;
1431 // Parsing the timestamp and cue settings.
1432 // See spec, https://w3c.github.io/webvtt/#collect-webvtt-cue-timings-and-settings
1433 function parseCueMayThrow(input) {
1435 createCueIfNeeded();
1436 parseCue(input, self.cue, self.regionList);
1437 self.state = "CUETEXT";
1439 self.reportOrThrowError(e);
1440 // In case of an error ignore rest of the cue.
1442 self.state = "BADCUE";
1446 // 3.4 WebVTT region and WebVTT region settings syntax
1447 function parseRegion(input) {
1448 let settings = new Settings();
1449 parseOptions(input, function (k, v) {
1455 settings.percent(k, v);
1458 settings.digitsValue(k, v);
1460 case "regionanchor":
1461 case "viewportanchor": {
1462 let xy = v.split(',');
1463 if (xy.length !== 2) {
1466 // We have to make sure both x and y parse, so use a temporary
1467 // settings object here.
1468 let anchor = new Settings();
1469 anchor.percent("x", xy[0]);
1470 anchor.percent("y", xy[1]);
1471 if (!anchor.has("x") || !anchor.has("y")) {
1474 settings.set(k + "X", anchor.get("x"));
1475 settings.set(k + "Y", anchor.get("y"));
1479 settings.alt(k, v, ["up"]);
1482 }, /:/, /\t|\n|\f|\r| /); // groupDelim is ASCII whitespace
1483 // https://infra.spec.whatwg.org/#ascii-whitespace, U+0009 TAB, U+000A LF, U+000C FF, U+000D CR, U+0020 SPACE
1485 // Create the region, using default values for any values that were not
1487 if (settings.has("id")) {
1489 let region = new self.window.VTTRegion();
1490 region.id = settings.get("id", "");
1491 region.width = settings.get("width", 100);
1492 region.lines = settings.get("lines", 3);
1493 region.regionAnchorX = settings.get("regionanchorX", 0);
1494 region.regionAnchorY = settings.get("regionanchorY", 100);
1495 region.viewportAnchorX = settings.get("viewportanchorX", 0);
1496 region.viewportAnchorY = settings.get("viewportanchorY", 100);
1497 region.scroll = settings.get("scroll", "");
1498 // Register the region.
1499 self.onregion && self.onregion(region);
1500 // Remember the VTTRegion for later in case we parse any VTTCues that
1502 self.regionList.push({
1503 id: settings.get("id"),
1507 dump("VTTRegion Error " + e + "\n");
1512 // Parsing the WebVTT signature, it contains parsing algo step1 to step9.
1513 // See spec, https://w3c.github.io/webvtt/#file-parsing
1514 function parseSignatureMayThrow(signature) {
1515 if (!/^WEBVTT([ \t].*)?$/.test(signature)) {
1516 throw new ParsingError(ParsingError.Errors.BadSignature);
1518 self.state = "HEADER";
1522 function parseRegionOrStyle(input) {
1523 switch (self.substate) {
1528 // TODO : not supported yet.
1532 // Parsing the region and style information.
1533 // See spec, https://w3c.github.io/webvtt/#collect-a-webvtt-block
1535 // There are sereval things would appear in header,
1536 // 1. Region or Style setting
1537 // 2. Garbage (meaningless string)
1539 // 4. Cue's timestamp
1540 // The case 4 happens when there is no line interval between the header
1541 // and the cue blocks. In this case, we should preserve the line for the
1542 // next phase parsing, returning "true".
1543 function parseHeader(line) {
1544 if (!self.substate && /^REGION|^STYLE/.test(line)) {
1545 self.substate = /^REGION/.test(line) ? "REGION" : "STYLE";
1549 if (self.substate === "REGION" || self.substate === "STYLE") {
1550 if (maybeIsTimeStampFormat(line) ||
1551 emptyOrOnlyContainsWhiteSpaces(line) ||
1552 containsTimeDirectionSymbol(line)) {
1553 parseRegionOrStyle(self.substatebuffer);
1554 self.substatebuffer = "";
1555 self.substate = null;
1557 // This is the end of the region or style state.
1558 return parseHeader(line);
1561 if (/^REGION|^STYLE/.test(line)) {
1562 // The line is another REGION/STYLE, parse and reset substatebuffer.
1563 // Don't break the while loop to parse the next REGION/STYLE.
1564 parseRegionOrStyle(self.substatebuffer);
1565 self.substatebuffer = "";
1566 self.substate = /^REGION/.test(line) ? "REGION" : "STYLE";
1570 // We weren't able to parse the line as a header. Accumulate and
1572 self.substatebuffer += " " + line;
1576 if (emptyOrOnlyContainsWhiteSpaces(line)) {
1577 // empty line, whitespaces, nothing to do.
1581 if (maybeIsTimeStampFormat(line)) {
1583 // We want to process the same line again.
1587 // string contains "-->" or an ID
1593 LOG(`state=${self.state}, line=${line}`)
1594 // 5.1 WebVTT file parsing.
1595 if (self.state === "INITIAL") {
1596 parseSignatureMayThrow(line);
1600 if (self.state === "HEADER") {
1601 // parseHeader returns false if the same line doesn't need to be
1603 if (!parseHeader(line)) {
1608 if (self.state === "ID") {
1609 // If there is no cue identifier, read the next line.
1614 // If there is no cue identifier, parse the line again.
1615 if (!parseCueIdentifier(line)) {
1616 return self.parseLine(line);
1621 if (self.state === "CUE") {
1622 parseCueMayThrow(line);
1626 if (self.state === "CUETEXT") {
1627 // Report the cue when (1) get an empty line (2) get the "-->""
1628 if (emptyOrOnlyContainsWhiteSpaces(line) ||
1629 containsTimeDirectionSymbol(line)) {
1630 // We are done parsing self cue.
1631 self.oncue && self.oncue(self.cue);
1635 if (emptyOrOnlyContainsWhiteSpaces(line)) {
1639 // Reuse the same line.
1640 return self.parseLine(line);
1642 if (self.cue.text) {
1643 self.cue.text += "\n";
1645 self.cue.text += line;
1649 if (self.state === "BADCUE") {
1650 // 54-62 - Collect and discard the remaining cue.
1652 return self.parseLine(line);
1655 self.reportOrThrowError(e);
1657 // If we are currently parsing a cue, report what we have.
1658 if (self.state === "CUETEXT" && self.cue && self.oncue) {
1659 self.oncue(self.cue);
1662 // Enter BADWEBVTT state if header was not parsed correctly otherwise
1663 // another exception occurred so enter BADCUE state.
1664 self.state = self.state === "INITIAL" ? "BADWEBVTT" : "BADCUE";
1668 flush: function () {
1671 // Finish decoding the stream.
1672 self.buffer += self.decoder.decode();
1673 self.buffer += "\n\n";
1676 self.reportOrThrowError(e);
1678 self.isPrevLineBlank = false;
1679 self.onflush && self.onflush();