2 * A :func:`rule` depends on another rule which itself depends on the first
3 * rule again, either directly or indirectly.
5 class CycleError extends Error {}
8 * An examined element was not contained in a browser ``window`` object, but
9 * something needed it to be.
11 class NoWindowError extends Error {}
13 var exceptions = /*#__PURE__*/ Object.freeze({
15 CycleError: CycleError,
16 NoWindowError: NoWindowError,
20 * Return the passed-in arg. Useful as a default.
22 function identity(x) {
26 /*eslint-env browser*/
29 * From an iterable return the best item, according to an arbitrary comparator
30 * function. In case of a tie, the first item wins.
32 * @arg by {function} Given an item of the iterable, return a value to compare
33 * @arg isBetter {function} Return whether its first arg is better than its
36 function best(iterable, by, isBetter) {
37 let bestSoFar, bestKeySoFar;
39 forEach(function (item) {
41 if (isBetter(key, bestKeySoFar) || isFirst) {
48 throw new Error('Tried to call best() on empty iterable');
54 * Return the maximum item from an iterable, as defined by >.
56 * Works with any type that works with >. If multiple items are equally great,
59 * @arg by {function} Given an item of the iterable, returns a value to
62 function max(iterable, by = identity) {
63 return best(iterable, by, (a, b) => a > b);
67 * Return an Array of maximum items from an iterable, as defined by > and ===.
69 * If an empty iterable is passed in, return [].
71 function maxes(iterable, by = identity) {
75 forEach(function (item) {
77 if (key > bestKeySoFar || isFirst) {
81 } else if (key === bestKeySoFar) {
89 * Return the minimum item from an iterable, as defined by <.
91 * If multiple items are equally great, return the first.
93 function min(iterable, by = identity) {
94 return best(iterable, by, (a, b) => a < b);
98 * Return the sum of an iterable, as defined by the + operator.
100 function sum(iterable) {
103 forEach(function assignOrAdd(addend) {
115 * Return the number of items in an iterable, consuming it as a side effect.
117 function length(iterable) {
119 // eslint-disable-next-line no-unused-vars
120 for (let item of iterable) {
127 * Iterate, depth first, over a DOM node. Return the original node first.
129 * @arg shouldTraverse {function} Given a node, say whether we should
130 * include it and its children. Default: always true.
132 function* walk(element, shouldTraverse = (element) => true) {
134 for (let child of element.childNodes) {
135 if (shouldTraverse(child)) {
136 for (let w of walk(child, shouldTraverse)) {
143 const blockTags = new Set([
182 * Return whether a DOM element is a block element by default (rather than by
185 function isBlock(element) {
186 return blockTags.has(element.tagName);
190 * Yield strings of text nodes within a normalized DOM node and its children,
191 * without venturing into any contained block elements.
193 * @arg shouldTraverse {function} Specify additional elements to exclude by
196 function* inlineTexts(element, shouldTraverse = (element) => true) {
197 // TODO: Could we just use querySelectorAll() with a really long
198 // selector rather than walk(), for speed?
199 for (let child of walk(
202 !(isBlock(element) || (element.tagName === 'SCRIPT' && element.tagName === 'STYLE')) &&
203 shouldTraverse(element)
205 if (child.nodeType === child.TEXT_NODE) {
206 // wholeText() is not implemented by jsdom, so we use
207 // textContent(). The result should be the same, since
208 // we're calling it on only text nodes, but it may be
209 // slower. On the positive side, it means we don't need to
210 // normalize the DOM tree first.
211 yield child.textContent;
217 * Return the total length of the inline text within an element, with
218 * whitespace collapsed.
220 * @arg shouldTraverse {function} Specify additional elements to exclude by
223 function inlineTextLength(element, shouldTraverse = (element) => true) {
224 return sum(map((text) => collapseWhitespace(text).length, inlineTexts(element, shouldTraverse)));
228 * Return a string with each run of whitespace collapsed to a single space.
230 function collapseWhitespace(str) {
231 return str.replace(/\s{2,}/g, ' ');
235 * Return the ratio of the inline text length of the links in an element to the
236 * inline text length of the entire element.
238 * @arg inlineLength {number} Optionally, the precalculated inline
239 * length of the fnode. If omitted, we will calculate it ourselves.
241 function linkDensity(fnode, inlineLength) {
242 if (inlineLength === undefined) {
243 inlineLength = inlineTextLength(fnode.element);
245 const lengthWithoutLinks = inlineTextLength(fnode.element, (element) => element.tagName !== 'A');
246 return (inlineLength - lengthWithoutLinks) / inlineLength;
250 * Return whether an element is a text node that consist wholly of whitespace.
252 function isWhitespace(element) {
253 return element.nodeType === element.TEXT_NODE && element.textContent.trim().length === 0;
257 * Get a key of a map, first setting it to a default value if it's missing.
259 function setDefault(map, key, defaultMaker) {
263 const defaultValue = defaultMaker();
264 map.set(key, defaultValue);
269 * Get a key of a map or, if it's missing, a default value.
271 function getDefault(map, key, defaultMaker) {
275 return defaultMaker();
279 * Return an Array, the reverse topological sort of the given nodes.
281 * @arg nodes An iterable of arbitrary things
282 * @arg nodesThatNeed {function} Take a node and returns an Array of nodes
285 function toposort(nodes, nodesThatNeed) {
287 const todo = new Set(nodes);
288 const inProgress = new Set();
290 function visit(node) {
291 if (inProgress.has(node)) {
292 throw new CycleError('The graph has a cycle.');
294 if (todo.has(node)) {
295 inProgress.add(node);
296 for (let needer of nodesThatNeed(node)) {
299 inProgress.delete(node);
305 while (todo.size > 0) {
312 * A Set with the additional methods it ought to have had
314 class NiceSet extends Set {
316 * Remove and return an arbitrary item. Throw an Error if I am empty.
319 for (let v of this.values()) {
323 throw new Error('Tried to pop from an empty NiceSet.');
327 * Union another set or other iterable into myself.
329 * @return myself, for chaining
332 for (let item of otherSet) {
339 * Subtract another set from a copy of me.
341 * @return a copy of myself excluding the elements in ``otherSet``.
344 const ret = new NiceSet(this);
345 for (const item of otherSet) {
352 * Actually show the items in me.
355 return '{' + Array.from(this).join(', ') + '}';
360 * Return the first item of an iterable.
362 function first(iterable) {
363 for (let i of iterable) {
369 * Given any node in a DOM tree, return the root element of the tree, generally
372 function rootElement(element) {
373 return element.ownerDocument.documentElement;
377 * Return the number of times a regex occurs within the string `haystack`.
379 * Caller must make sure `regex` has the 'g' option set.
381 function numberOfMatches(regex, haystack) {
382 return (haystack.match(regex) || []).length;
386 * Wrap a scoring callback, and set its element to the page root iff a score is
389 * This is used to build rulesets which classify entire pages rather than
390 * picking out specific elements.
392 * For example, these rules might classify a page as a "login page", influenced
393 * by whether they have login buttons or username fields:
395 * ``rule(type('loginPage'), score(page(pageContainsLoginButton))),``
396 * ``rule(type('loginPage'), score(page(pageContainsUsernameField)))``
398 function page(scoringFunction) {
399 function wrapper(fnode) {
400 const scoreAndTypeAndNote = scoringFunction(fnode);
401 if (scoreAndTypeAndNote.score !== undefined) {
402 scoreAndTypeAndNote.element = rootElement(fnode.element);
404 return scoreAndTypeAndNote;
410 * Sort the elements by their position in the DOM.
412 * @arg fnodes {iterable} fnodes to sort
413 * @return {Array} sorted fnodes
415 function domSort(fnodes) {
416 function compare(a, b) {
417 const element = a.element;
418 const position = element.compareDocumentPosition(b.element);
419 if (position & element.DOCUMENT_POSITION_FOLLOWING) {
421 } else if (position & element.DOCUMENT_POSITION_PRECEDING) {
427 return Array.from(fnodes).sort(compare);
430 /* istanbul ignore next */
432 * Return the DOM element contained in a passed-in fnode. Return passed-in DOM
435 * @arg fnodeOrElement {Node|Fnode}
437 function toDomElement(fnodeOrElement) {
438 return isDomElement(fnodeOrElement) ? fnodeOrElement : fnodeOrElement.element;
442 * Checks whether any of the element's attribute values satisfy some condition.
447 * score(attributesMatch(element,
448 * attr => attr.includes('good'),
449 * ['id', 'alt']) ? 2 : 1))
451 * @arg element {Node} Element whose attributes you want to search
452 * @arg predicate {function} A condition to check. Take a string and
453 * return a boolean. If an attribute has multiple values (e.g. the class
454 * attribute), attributesMatch will check each one.
455 * @arg attrs {string[]} An Array of attributes you want to search. If none are
456 * provided, search all.
457 * @return Whether any of the attribute values satisfy the predicate function
459 function attributesMatch(element, predicate, attrs = []) {
460 const attributes = attrs.length === 0 ? Array.from(element.attributes).map((a) => a.name) : attrs;
461 for (let i = 0; i < attributes.length; i++) {
462 const attr = element.getAttribute(attributes[i]);
463 // If the attribute is an array, apply the scoring function to each element
464 if (attr && ((Array.isArray(attr) && attr.some(predicate)) || predicate(attr))) {
471 /* istanbul ignore next */
473 * Yield an element and each of its ancestors.
475 function* ancestors(element) {
478 while ((parent = element.parentNode) !== null && parent.nodeType === parent.ELEMENT_NODE) {
485 * Return the sigmoid of the argument: 1 / (1 + exp(-x)). This is useful for
486 * crunching a feature value that may have a wide range into the range (0, 1)
487 * without a hard ceiling: the sigmoid of even a very large number will be a
488 * little larger than that of a slightly smaller one.
490 * @arg x {Number} a number to be compressed into the range (0, 1)
492 function sigmoid(x) {
493 return 1 / (1 + Math.exp(-x));
496 /* istanbul ignore next */
498 * Return whether an element is practically visible, considering things like 0
499 * size or opacity, ``visibility: hidden`` and ``overflow: hidden``.
501 * Merely being scrolled off the page in either horizontally or vertically
502 * doesn't count as invisible; the result of this function is meant to be
503 * independent of viewport size.
505 * @throws {NoWindowError} The element (or perhaps one of its ancestors) is not
506 * in a window, so we can't find the `getComputedStyle()` routine to call.
507 * That routine is the source of most of the information we use, so you
508 * should pick a different strategy for non-window contexts.
510 function isVisible(fnodeOrElement) {
511 // This could be 5x more efficient if https://github.com/w3c/csswg-drafts/issues/4122 happens.
512 const element = toDomElement(fnodeOrElement);
513 const elementWindow = windowForElement(element);
514 const elementRect = element.getBoundingClientRect();
515 const elementStyle = elementWindow.getComputedStyle(element);
516 // Alternative to reading ``display: none`` due to Bug 1381071.
517 if (elementRect.width === 0 && elementRect.height === 0 && elementStyle.overflow !== 'hidden') {
520 if (elementStyle.visibility === 'hidden') {
523 // Check if the element is irrevocably off-screen:
524 if (elementRect.x + elementRect.width < 0 || elementRect.y + elementRect.height < 0) {
527 for (const ancestor of ancestors(element)) {
528 const isElement = ancestor === element;
529 const style = isElement ? elementStyle : elementWindow.getComputedStyle(ancestor);
530 if (style.opacity === '0') {
533 if (style.display === 'contents') {
534 // ``display: contents`` elements have no box themselves, but children are
538 const rect = isElement ? elementRect : ancestor.getBoundingClientRect();
539 if ((rect.width === 0 || rect.height === 0) && elementStyle.overflow === 'hidden') {
540 // Zero-sized ancestors don’t make descendants hidden unless the descendant
541 // has ``overflow: hidden``.
549 * Return the extracted [r, g, b, a] values from a string like "rgba(0, 5, 255, 0.8)",
550 * and scale them to 0..1. If no alpha is specified, return undefined for it.
552 function rgbaFromString(str) {
553 const m = str.match(/^rgba?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*(?:,\s*(\d+(?:\.\d+)?)\s*)?\)$/i);
555 return [m[1] / 255, m[2] / 255, m[3] / 255, m[4] === undefined ? undefined : parseFloat(m[4])];
557 throw new Error('Color ' + str + ' did not match pattern rgb[a](r, g, b[, a]).');
562 * Return the saturation 0..1 of a color defined by RGB values 0..1.
564 function saturation(r, g, b) {
565 const cMax = Math.max(r, g, b);
566 const cMin = Math.min(r, g, b);
567 const delta = cMax - cMin;
568 const lightness = (cMax + cMin) / 2;
569 const denom = 1 - Math.abs(2 * lightness - 1);
570 // Return 0 if it's black (R, G, and B all 0).
571 return denom === 0 ? 0 : delta / denom;
575 * Scale a number to the range [0, 1] using a linear slope.
577 * For a rising line, the result is 0 until the input reaches zeroAt, then
578 * increases linearly until oneAt, at which it becomes 1. To make a falling
579 * line, where the result is 1 to the left and 0 to the right, use a zeroAt
580 * greater than oneAt.
582 function linearScale(number, zeroAt, oneAt) {
583 const isRising = zeroAt < oneAt;
585 if (number <= zeroAt) {
587 } else if (number >= oneAt) {
591 if (number >= zeroAt) {
593 } else if (number <= oneAt) {
597 const slope = 1 / (oneAt - zeroAt);
598 return slope * (number - zeroAt);
601 // -------- Routines below this point are private to the framework. --------
604 * Flatten out an iterable of iterables into a single iterable of non-
605 * iterables. Does not consider strings to be iterable.
607 function* flatten(iterable) {
608 for (const i of iterable) {
609 if (typeof i !== 'string' && !(i instanceof Element) && isIterable(i)) {
618 * A lazy, top-level ``Array.map()`` workalike that works on anything iterable
620 function* map(fn, iterable) {
621 for (const i of iterable) {
627 * A lazy, top-level ``Array.forEach()`` workalike that works on anything
630 function forEach(fn, iterable) {
631 for (const i of iterable) {
636 /* istanbul ignore next */
638 * @return whether a thing appears to be a DOM element.
640 function isDomElement(thing) {
641 return thing.nodeName !== undefined;
644 function isIterable(thing) {
645 return thing && typeof thing[Symbol.iterator] === 'function';
649 * Return an backward iterator over an Array without reversing it in place.
651 function* reversed(array) {
652 for (let i = array.length - 1; i >= 0; i--) {
657 /* istanbul ignore next */
659 * Return the window an element is in.
661 * @throws {NoWindowError} There isn't such a window.
663 function windowForElement(element) {
664 let doc = element.ownerDocument;
666 // The element itself was a document.
669 const win = doc.defaultView;
671 throw new NoWindowError();
676 var utilsForFrontend = /*#__PURE__*/ Object.freeze({
679 ancestors: ancestors,
680 attributesMatch: attributesMatch,
682 collapseWhitespace: collapseWhitespace,
687 getDefault: getDefault,
689 inlineTextLength: inlineTextLength,
690 inlineTexts: inlineTexts,
692 isDomElement: isDomElement,
693 isVisible: isVisible,
694 isWhitespace: isWhitespace,
696 linearScale: linearScale,
697 linkDensity: linkDensity,
702 numberOfMatches: numberOfMatches,
705 rgbaFromString: rgbaFromString,
706 rootElement: rootElement,
707 saturation: saturation,
708 setDefault: setDefault,
711 toDomElement: toDomElement,
714 windowForElement: windowForElement,
718 * Return the number of stride nodes between 2 DOM nodes *at the same
719 * level of the tree*, without going up or down the tree.
721 * ``left`` xor ``right`` may also be undefined.
723 function numStrides(left, right) {
726 // Walk right from left node until we hit the right node or run out:
728 let shouldContinue = sibling && sibling !== right;
729 while (shouldContinue) {
730 sibling = sibling.nextSibling;
731 if ((shouldContinue = sibling && sibling !== right) && !isWhitespace(sibling)) {
735 if (sibling !== right) {
736 // Don't double-punish if left and right are siblings.
737 // Walk left from right node:
740 sibling = sibling.previousSibling;
741 if (sibling && !isWhitespace(sibling)) {
750 * Return a topological distance between 2 DOM nodes or :term:`fnodes<fnode>`
751 * weighted according to the similarity of their ancestry in the DOM. For
752 * instance, if one node is situated inside ``<div><span><b><theNode>`` and the
753 * other node is at ``<differentDiv><span><b><otherNode>``, they are considered
754 * close to each other for clustering purposes. This is useful for picking out
755 * nodes which have similar purposes.
757 * Return ``Number.MAX_VALUE`` if one of the nodes contains the other.
759 * This is largely an implementation detail of :func:`clusters`, but you can
760 * call it yourself if you wish to implement your own clustering. Takes O(n log
763 * Note that the default costs may change; pass them in explicitly if they are
766 * @arg fnodeA {Node|Fnode}
767 * @arg fnodeB {Node|Fnode}
768 * @arg differentDepthCost {number} Cost for each level deeper one node is than
769 * the other below their common ancestor
770 * @arg differentTagCost {number} Cost for a level below the common ancestor
771 * where tagNames differ
772 * @arg sameTagCost {number} Cost for a level below the common ancestor where
773 * tagNames are the same
774 * @arg strideCost {number} Cost for each stride node between A and B. Stride
775 * nodes are siblings or siblings-of-ancestors that lie between the 2
776 * nodes. These interposed nodes make it less likely that the 2 nodes
777 * should be together in a cluster.
778 * @arg additionalCost {function} Return an additional cost, given 2 fnodes or
786 differentDepthCost = 2,
787 differentTagCost = 2,
790 additionalCost = (fnodeA, fnodeB) => 0,
793 // I was thinking of something that adds little cost for siblings. Up
794 // should probably be more expensive than down (see middle example in the
797 // TODO: Test and tune default costs. They're off the cuff at the moment.
799 if (fnodeA === fnodeB) {
803 const elementA = isDomElement(fnodeA) ? fnodeA : fnodeA.element;
804 const elementB = isDomElement(fnodeB) ? fnodeB : fnodeB.element;
806 // Stacks that go from the common ancestor all the way to A and B:
807 const aAncestors = [elementA];
808 const bAncestors = [elementB];
810 let aAncestor = elementA;
811 let bAncestor = elementB;
813 // Ascend to common parent, stacking them up for later reference:
814 while (!aAncestor.contains(elementB)) {
815 // Note: an element does contain() itself.
816 aAncestor = aAncestor.parentNode;
817 aAncestors.push(aAncestor); //aAncestors = [a, b]. aAncestor = b // if a is outer: no loop here; aAncestors = [a]. aAncestor = a.
820 // In compareDocumentPosition()'s opinion, inside implies after. Basically,
821 // before and after pertain to opening tags.
822 const comparison = elementA.compareDocumentPosition(elementB);
824 // If either contains the other, abort. We'd either return a misleading
825 // number or else walk upward right out of the document while trying to
826 // make the ancestor stack.
827 if (comparison & (elementA.DOCUMENT_POSITION_CONTAINS | elementA.DOCUMENT_POSITION_CONTAINED_BY)) {
828 return Number.MAX_VALUE;
830 // Make an ancestor stack for the right node too so we can walk
831 // efficiently down to it:
833 bAncestor = bAncestor.parentNode; // Assumes we've early-returned above if A === B. This walks upward from the outer node and up out of the tree. It STARTS OUT with aAncestor === bAncestor!
834 bAncestors.push(bAncestor);
835 } while (bAncestor !== aAncestor);
837 // Figure out which node is left and which is right, so we can follow
838 // sibling links in the appropriate directions when looking for stride
840 let left = aAncestors;
841 let right = bAncestors;
843 if (comparison & elementA.DOCUMENT_POSITION_FOLLOWING) {
844 // A is before, so it could contain the other node. What did I mean to do if one contained the other?
847 } else if (comparison & elementA.DOCUMENT_POSITION_PRECEDING) {
848 // A is after, so it might be contained by the other node.
853 // Descend to both nodes in parallel, discounting the traversal
854 // cost iff the nodes we hit look similar, implying the nodes dwell
855 // within similar structures.
856 while (left.length || right.length) {
857 const l = left.pop();
858 const r = right.pop();
859 if (l === undefined || r === undefined) {
860 // Punishment for being at different depths: same as ordinary
861 // dissimilarity punishment for now
862 cost += differentDepthCost;
864 // TODO: Consider similarity of classList.
865 cost += l.tagName === r.tagName ? sameTagCost : differentTagCost;
867 // Optimization: strides might be a good dimension to eliminate.
868 if (strideCost !== 0) {
869 cost += numStrides(l, r) * strideCost;
873 return cost + additionalCost(fnodeA, fnodeB);
877 * Return the spatial distance between 2 fnodes or elements, assuming a
880 * Specifically, return the distance in pixels between the centers of
881 * ``fnodeA.element.getBoundingClientRect()`` and
882 * ``fnodeB.element.getBoundingClientRect()``.
884 function euclidean(fnodeA, fnodeB) {
886 * Return the horizontal distance from the left edge of the viewport to the
887 * center of an element, given a DOMRect object for it. It doesn't matter
888 * that the distance is affected by the page's scroll offset, since the 2
889 * elements have the same offset.
891 function xCenter(domRect) {
892 return domRect.left + domRect.width / 2;
894 function yCenter(domRect) {
895 return domRect.top + domRect.height / 2;
898 const elementA = toDomElement(fnodeA);
899 const elementB = toDomElement(fnodeB);
900 const aRect = elementA.getBoundingClientRect();
901 const bRect = elementB.getBoundingClientRect();
902 return Math.sqrt((xCenter(aRect) - xCenter(bRect)) ** 2 + (yCenter(aRect) - yCenter(bRect)) ** 2);
905 /** A lower-triangular matrix of inter-cluster distances */
906 class DistanceMatrix {
908 * @arg distance {function} Some notion of distance between 2 given nodes
910 constructor(elements, distance) {
911 // A sparse adjacency matrix:
914 // C => {A => 4, B => 4},
915 // D => {A => 4, B => 4, C => 4}
916 // E => {A => 4, B => 4, C => 4, D => 4}}
918 // A, B, etc. are arrays of [arrays of arrays of...] nodes, each
919 // array being a cluster. In this way, they not only accumulate a
920 // cluster but retain the steps along the way.
922 // This is an efficient data structure in terms of CPU and memory, in
923 // that we don't have to slide a lot of memory around when we delete a
924 // row or column from the middle of the matrix while merging. Of
925 // course, we lose some practical efficiency by using hash tables, and
926 // maps in particular are slow in their early implementations.
927 this._matrix = new Map();
929 // Convert elements to clusters:
930 const clusters = elements.map((el) => [el]);
933 for (let outerCluster of clusters) {
934 const innerMap = new Map();
935 for (let innerCluster of this._matrix.keys()) {
936 innerMap.set(innerCluster, distance(outerCluster[0], innerCluster[0]));
938 this._matrix.set(outerCluster, innerMap);
940 this._numClusters = clusters.length;
943 // Return (distance, a: clusterA, b: clusterB) of closest-together clusters.
944 // Replace this to change linkage criterion.
948 if (this._numClusters < 2) {
949 throw new Error('There must be at least 2 clusters in order to return the closest() ones.');
952 // Return the distances between every pair of clusters.
953 function clustersAndDistances() {
955 for (let [outerKey, row] of self._matrix.entries()) {
956 for (let [innerKey, storedDistance] of row.entries()) {
957 ret.push({ a: outerKey, b: innerKey, distance: storedDistance });
962 // Optimizing this by inlining the loop and writing it less
963 // functionally doesn't help:
964 return min(clustersAndDistances(), (x) => x.distance);
967 // Look up the distance between 2 clusters in me. Try the lookup in the
968 // other direction if the first one falls in the nonexistent half of the
970 _cachedDistance(clusterA, clusterB) {
971 let ret = this._matrix.get(clusterA).get(clusterB);
972 if (ret === undefined) {
973 ret = this._matrix.get(clusterB).get(clusterA);
978 // Merge two clusters.
979 merge(clusterA, clusterB) {
980 // An example showing how rows merge:
984 // D: {A: 4, B: 4, C: 4}
985 // E: {A: 4, B: 4, C: 2, D: 4}}
991 // AB: {C: 4, D: 4, E: 4}
998 // Construct new row, finding min distances from either subcluster of
999 // the new cluster to old clusters.
1001 // There will be no repetition in the matrix because, after all,
1002 // nothing pointed to this new cluster before it existed.
1003 const newRow = new Map();
1004 for (let outerKey of this._matrix.keys()) {
1005 if (outerKey !== clusterA && outerKey !== clusterB) {
1008 Math.min(this._cachedDistance(clusterA, outerKey), this._cachedDistance(clusterB, outerKey))
1013 // Delete the rows of the clusters we're merging.
1014 this._matrix.delete(clusterA);
1015 this._matrix.delete(clusterB);
1017 // Remove inner refs to the clusters we're merging.
1018 for (let inner of this._matrix.values()) {
1019 inner.delete(clusterA);
1020 inner.delete(clusterB);
1024 this._matrix.set([clusterA, clusterB], newRow);
1026 // There is a net decrease of 1 cluster:
1027 this._numClusters -= 1;
1031 return this._numClusters;
1034 // Return an Array of nodes for each cluster in me.
1036 // TODO: Can't get map to work here. Don't know why.
1037 return Array.from(this._matrix.keys()).map((e) => Array.from(flatten(e)));
1042 * Partition the given nodes into one or more clusters by position in the DOM
1045 * This implements an agglomerative clustering. It uses single linkage, since
1046 * we're talking about adjacency here more than Euclidean proximity: the
1047 * clusters we're talking about in the DOM will tend to be adjacent, not
1048 * overlapping. We haven't tried other linkage criteria yet.
1050 * In a later release, we may consider score or notes.
1052 * @arg {Fnode[]|Node[]} fnodes :term:`fnodes<fnode>` or DOM nodes to group
1054 * @arg {number} splittingDistance The closest-nodes :func:`distance` beyond
1055 * which we will not attempt to unify 2 clusters. Make this larger to make
1057 * @arg getDistance {function} A function that returns some notion of numerical
1058 * distance between 2 nodes. Default: :func:`distance`
1059 * @return {Array} An Array of Arrays, with each Array containing all the
1060 * nodes in one cluster. Note that neither the clusters nor the nodes are
1061 * in any particular order. You may find :func:`domSort` helpful to remedy
1064 function clusters(fnodes, splittingDistance, getDistance = distance) {
1065 const matrix = new DistanceMatrix(fnodes, getDistance);
1068 while (matrix.numClusters() > 1 && (closest = matrix.closest()).distance < splittingDistance) {
1069 matrix.merge(closest.a, closest.b);
1072 return matrix.clusters();
1075 var clusters$1 = /*#__PURE__*/ Object.freeze({
1079 euclidean: euclidean,
1082 // The left-hand side of a rule
1085 * Take nodes that match a given DOM selector. Example:
1086 * ``dom('meta[property="og:title"]')``
1088 * Every ruleset has at least one ``dom`` or :func:`element` rule, as that is
1089 * where nodes begin to flow into the system. If run against a subtree of a
1090 * document, the root of the subtree is not considered as a possible match.
1092 function dom(selector) {
1093 return new DomLhs(selector);
1097 * Take a single given node if it matches a given DOM selector, without looking
1098 * through its descendents or ancestors. Otherwise, take no nodes. Example:
1099 * ``element('input')``
1101 * This is useful for applications in which you want Fathom to classify an
1102 * element the user has selected, rather than scanning the whole page for
1105 function element(selector) {
1106 return new ElementLhs(selector);
1111 * Allows running a custom function when selecting
1112 * DOM elements in a fathom LHS rule.
1114 function domQuery(query) {
1115 return new DomQueryLhs(query);
1119 * Rules and the LHSs and RHSs that comprise them have no mutable state. This
1120 * lets us make BoundRulesets from Rulesets without duplicating the rules. It
1121 * also lets us share a common cache among rules: multiple ones might care
1122 * about a cached type(), for instance; there isn't a one-to-one relationship
1123 * of storing with caring. There would also, because of the interdependencies
1124 * of rules in a ruleset, be little use in segmenting the caches: if you do
1125 * something that causes one to need to be cleared, you'll need to clear many
1128 * Lhses are responsible for maintaining ruleset.maxCache.
1130 * Lhs and its subclasses are private to the Fathom framework.
1134 this._predicate = () => true;
1137 /** Return a new Lhs of the appropriate kind, given its first call. */
1138 static fromFirstCall(firstCall) {
1139 // firstCall is never 'dom', because dom() directly returns a DomLhs.
1140 if (firstCall.method === 'type') {
1141 return new TypeLhs(...firstCall.args);
1142 } else if (firstCall.method === 'and') {
1143 return new AndLhs(firstCall.args);
1144 } else if (firstCall.method === 'nearest') {
1145 return new NearestLhs(firstCall.args);
1147 throw new Error('The left-hand side of a rule() must start with dom(), type(), and(), or nearest().');
1152 * Prune nodes from consideration early in run execution, before scoring is
1155 * Reserve this for where you are sure it is always correct or when
1156 * performance demands it. It is generally preferable to use :func:`score`
1157 * and let the :doc:`trainer<training>` determine the relative significance
1158 * of each rule. Human intuition as to what is important is often wrong:
1159 * for example, one might assume that a music player website would include
1160 * the word "play", but this does not hold once you include sites in other
1163 * Can be chained after :func:`type` or :func:`dom`.
1165 * Example: ``dom('p').when(isVisible)``
1167 * @arg {function} predicate Accepts a fnode and returns a boolean
1170 let lhs = this.clone();
1171 lhs._predicate = predicate;
1176 * Of all the dom nodes selected by type() or dom(), return only
1177 * the fnodes that satisfy all the predicates imposed by calls to
1180 fnodesSatisfyingWhen(fnodes) {
1181 return Array.from(fnodes).filter(this._predicate);
1185 * Return an iterable of output fnodes selected by this left-hand-side
1188 * Pre: The rules I depend on have already been run, and their results are
1189 * in ruleset.typeCache.
1191 * @arg ruleset {BoundRuleset}
1193 // fnodes (ruleset) {}
1196 * Check that a RHS-emitted fact is legal for this kind of LHS, and throw
1197 * an error if it isn't.
1202 * Return the single type the output of the LHS is guaranteed to have.
1203 * Return undefined if there is no such single type we can ascertain.
1208 * Return the type I aggregate if I am an aggregate LHS; return undefined
1214 * Return each combination of types my selected nodes could be locally (that
1215 * is, by this rule only) constrained to have.
1217 * For example, type(A) would return [A]. and(A, or(B, C)) would return
1218 * [AB, AC, ABC]. More examples:
1220 * or(A, B) → typeIn(A, B, C) # Finalizes A, B. combos A, B, AB: finalizes AB. Optimization: there's no point in returning the last combo in ors. Compilation into 2 rules with identical RHSs will inherently implement this optimization.
1221 * or(A, B) → typeIn(A, B) # Finalizes A, B
1222 * or(A, B) → A # Finalizes B
1223 * and(A) -> A # Finalizes nothing
1224 * and(A, B) -> A # Finalizes nothing. AB: Ø
1225 * and(A) -> typeIn(A, B) # Finalizes A. A
1226 * and(A, B) -> typeIn(A, B) # Finalizes nothing. AB
1227 * and(A, B) -> typeIn(A, B, C) # Finalizes A, B. AB
1228 * and(A, or(B, C)) -> D # Finalizes A, B, C. AB, AC, ABC: ABC
1229 * and(A, or(B, C)) -> B # Finalizes A, C. AB, AC, ABC: AC
1230 * type(A).not(and(A, B)) ->
1232 * @return {NiceSet[]}
1234 // possibleTypeCombinations() {}
1237 * Types mentioned in this LHS.
1239 * In other words, the types I need to know the assignment status of before
1240 * I can make my selections
1242 * @return NiceSet of strings
1244 // typesMentioned() {}
1247 class DomLhs extends Lhs {
1248 constructor(selector) {
1250 if (selector === undefined) {
1252 'A querySelector()-style selector is required as the argument to ' + this._callName() + '().'
1255 this.selector = selector;
1259 * Return the name of this kind of LHS, for use in error messages.
1266 return new this.constructor(this.selector);
1270 return this._domNodesToFilteredFnodes(ruleset, ruleset.doc.querySelectorAll(this.selector));
1274 * Turn a NodeList of DOM nodes into an array of fnodes, and filter out
1275 * those that don't match the :func:`when()` clause.
1277 _domNodesToFilteredFnodes(ruleset, domNodes) {
1279 for (let i = 0; i < domNodes.length; i++) {
1280 ret.push(ruleset.fnodeForElement(domNodes[i]));
1282 return this.fnodesSatisfyingWhen(ret);
1286 if (fact.type === undefined) {
1288 `The right-hand side of a ${this._callName()}() rule failed to specify a type. This means there is no way for its output to be used by later rules. All it specified was ${fact}.`
1297 possibleTypeCombinations() {
1302 return new NiceSet();
1306 class ElementLhs extends DomLhs {
1312 return this._domNodesToFilteredFnodes(ruleset, ruleset.doc.matches(this.selector) ? [ruleset.doc] : []);
1318 * Extends DomLhs rule but instead of passing
1319 * a single string selector, allows passing a
1320 * custom selector function that will be run
1321 * against the document
1323 class DomQueryLhs extends DomLhs {
1324 constructor(query) {
1326 * mock selector to avoid parent DomLhs
1327 * throwing in constructor call
1338 return new DomQueryLhs(this.query);
1342 return this._domNodesToFilteredFnodes(ruleset, this.query(ruleset.doc));
1346 /** Internal representation of a LHS constrained by type but not by max() */
1347 class TypeLhs extends Lhs {
1350 if (type === undefined) {
1351 throw new Error('A type name is required when calling type().');
1353 this._type = type; // the input type
1357 return new this.constructor(this._type);
1361 const cached = getDefault(ruleset.typeCache, this._type, () => []);
1362 return this.fnodesSatisfyingWhen(cached);
1365 /** Override the type previously specified by this constraint. */
1367 // Preserve the class in case this is a TypeMaxLhs.
1368 return new this.constructor(inputType);
1372 * Of the nodes selected by a ``type`` call to the left, constrain the LHS
1373 * to return only the max-scoring one. If there is a tie, more than 1 node
1374 * will be returned. Example: ``type('titley').max()``
1377 return new TypeMaxLhs(this._type);
1381 * Take the nodes selected by a ``type`` call to the left, group them into
1382 * clusters, and return the nodes in the cluster that has the highest total
1383 * score (on the relevant type).
1385 * Nodes come out in arbitrary order, so, if you plan to emit them,
1386 * consider using ``.out('whatever').allThrough(domSort)``. See
1389 * If multiple clusters have equally high scores, return an arbitrary one,
1390 * because Fathom has no way to represent arrays of arrays in rulesets.
1392 * @arg options {Object} The same depth costs taken by :func:`distance`,
1393 * plus ``splittingDistance``, which is the distance beyond which 2
1394 * clusters will be considered separate. ``splittingDistance``, if
1395 * omitted, defaults to 3.
1397 bestCluster(options) {
1398 return new BestClusterLhs(this._type, options);
1401 // Other clustering calls could be called biggestCluster() (having the most
1402 // nodes) and bestAverageCluster() (having the highest average score).
1408 possibleTypeCombinations() {
1409 return [this.typesMentioned()];
1413 return new NiceSet([this._type]);
1418 * Abstract LHS that is an aggregate function taken across all fnodes of a type
1420 * The main point here is that any aggregate function over a (typed) set of
1421 * nodes depends on first computing all the rules that could emit those nodes
1422 * (nodes of that type).
1424 class AggregateTypeLhs extends TypeLhs {
1431 * Internal representation of a LHS that has both type and max([NUMBER])
1432 * constraints. max(NUMBER != 1) support is not yet implemented.
1434 class TypeMaxLhs extends AggregateTypeLhs {
1436 * Return the max-scoring node (or nodes if there is a tie) of the given
1440 // TODO: Optimize better. Walk the dependency tree, and run only the
1441 // rules that could possibly lead to a max result. As part of this,
1442 // make RHSs expose their max potential scores.
1444 // Work around V8 bug:
1445 // https://stackoverflow.com/questions/32943776/using-super-within-an-
1446 // arrow-function-within-an-arrow-function-within-a-method
1447 const getSuperFnodes = () => super.fnodes(ruleset);
1448 return setDefault(ruleset.maxCache, this._type, function maxFnodesOfType() {
1449 return maxes(getSuperFnodes(), (fnode) => ruleset.weightedScore(fnode.scoresSoFarFor(self._type)));
1454 class BestClusterLhs extends AggregateTypeLhs {
1455 constructor(type, options) {
1457 this._options = options || { splittingDistance: 3 };
1461 * Group the nodes of my type into clusters, and return the cluster with
1462 * the highest total score for that type.
1465 // Get the nodes of the type:
1466 const fnodesOfType = Array.from(super.fnodes(ruleset));
1467 if (fnodesOfType.length === 0) {
1471 const clusts = clusters(fnodesOfType, this._options.splittingDistance, (a, b) => distance(a, b, this._options));
1472 // Tag each cluster with the total of its nodes' scores:
1473 const clustsAndSums = clusts.map((clust) => [clust, sum(clust.map((fnode) => fnode.scoreFor(this._type)))]);
1474 // Return the highest-scoring cluster:
1475 return max(clustsAndSums, (clustAndSum) => clustAndSum[1])[0];
1479 class AndLhs extends Lhs {
1483 // For the moment, we accept only type()s as args. TODO: Generalize to
1484 // type().max() and such later.
1485 this._args = lhss.map(sideToTypeLhs);
1489 // Take an arbitrary one for starters. Optimization: we could always
1490 // choose the pickiest one to start with.
1491 const fnodes = this._args[0].fnodes(ruleset);
1492 // Then keep only the fnodes that have the type of every other arg:
1493 fnodeLoop: for (let fnode of fnodes) {
1494 for (let otherLhs of this._args.slice(1)) {
1495 // Optimization: could use a .hasTypeSoFar() below
1496 if (!fnode.hasType(otherLhs.guaranteedType())) {
1497 // TODO: This is n^2. Why is there no set intersection in JS?!
1505 possibleTypeCombinations() {
1506 return [this.typesMentioned()];
1510 return new NiceSet(this._args.map((arg) => arg.guaranteedType()));
1514 function sideToTypeLhs(side) {
1515 const lhs = side.asLhs();
1516 if (!(lhs.constructor === TypeLhs)) {
1517 throw new Error('and() and nearest() support only simple type() calls as arguments for now.');
1518 // TODO: Though we could solve this with a compilation step: and(type(A), type(B).max()) is equivalent to type(B).max() -> type(Bmax); and(type(A), type(Bmax)).
1519 // In fact, we should be able to compile most (any?) arbitrary and()s, including nested ands and and(type(...).max(), ...) constructions into several and(type(A), type(B), ...) rules.
1524 class NearestLhs extends Lhs {
1525 constructor([a, b, distance]) {
1527 this._a = sideToTypeLhs(a);
1528 this._b = sideToTypeLhs(b);
1529 this._distance = distance;
1533 * Return an iterable of {fnodes, transformer} pairs.
1536 // Go through all the left arg's nodes. For each one, find the closest
1537 // right-arg's node. O(a * b). Once a right-arg's node is used, we
1538 // don't eliminate it from consideration, because then order of left-
1539 // args' nodes would matter.
1541 // TODO: Still not sure how to get the distance to factor into the
1542 // score unless we hard-code nearest() to do that. It's a
1543 // matter of not being able to bind on the RHS to the output of the
1544 // distance function on the LHS. Perhaps we could at least make
1545 // distance part of the note and read it in a props() callback.
1547 // We're assuming here that simple type() calls return just plain
1548 // fnodes, not {fnode, rhsTransformer} pairs:
1549 const as_ = this._a.fnodes(ruleset);
1550 const bs = Array.from(this._b.fnodes(ruleset));
1551 if (bs.length > 0) {
1552 // If bs is empty, there can be no nearest nodes, so don't emit any.
1553 for (const a of as_) {
1554 const nearest = min(bs, (b) => this._distance(a, b));
1557 rhsTransformer: function setNoteIfEmpty(fact) {
1558 // If note is explicitly set by the RHS, let it take
1559 // precedence, even though that makes this entire LHS
1561 if (fact.note === undefined) {
1562 fact.note = nearest; // TODO: Wrap this in an object to make room to return distance later.
1572 // Barf if the fact doesn't set a type at least. It should be a *new* type or at least one that doesn't result in cycles, but we can't deduce that.
1575 possibleTypeCombinations() {
1576 return [new NiceSet([this._a.guaranteedType()])];
1580 return new NiceSet([this._a.guaranteedType(), this._b.guaranteedType()]);
1584 return this._a.guaranteedType();
1588 // The right-hand side of a rule
1602 * Expose the output of this rule's LHS as a "final result" to the surrounding
1603 * program. It will be available by calling :func:`~BoundRuleset.get` on the
1604 * ruleset and passing the key. You can run each node through a callback
1605 * function first by adding :func:`through()`, or you can run the entire set of
1606 * nodes through a callback function by adding :func:`allThrough()`.
1609 return new OutwardRhs(key);
1613 constructor(calls = [], max = Infinity, types) {
1614 this._calls = calls.slice();
1615 this._max = max; // max score
1616 this._types = new NiceSet(types); // empty set if unconstrained
1620 * Declare that the maximum returned subscore is such and such,
1621 * which helps the optimizer plan efficiently. This doesn't force it to be
1622 * true; it merely throws an error at runtime if it isn't. To lift an
1623 * ``atMost`` constraint, call ``atMost()`` (with no args). The reason
1624 * ``atMost`` and ``typeIn`` apply until explicitly cleared is so that, if
1625 * someone used them for safety reasons on a lexically distant rule you are
1626 * extending, you won't stomp on their constraint and break their
1627 * invariants accidentally.
1630 return new this.constructor(this._calls, score, this._types);
1633 _checkAtMost(fact) {
1634 if (fact.score !== undefined && fact.score > this._max) {
1635 throw new Error(`Score of ${fact.score} exceeds the declared atMost(${this._max}).`);
1640 * Determine any of type, note, score, and element using a callback. This
1641 * overrides any previous call to `props` and, depending on what
1642 * properties of the callback's return value are filled out, may override
1643 * the effects of other previous calls as well.
1645 * The callback should return...
1647 * * An optional :term:`subscore`
1648 * * A type (required on ``dom(...)`` rules, defaulting to the input one on
1649 * ``type(...)`` rules)
1651 * * An element, defaulting to the input one. Overriding the default
1652 * enables a callback to walk around the tree and say things about nodes
1653 * other than the input one.
1656 function getSubfacts(fnode) {
1657 const subfacts = callback(fnode);
1658 // Filter the raw result down to okayed properties so callbacks
1659 // can't insert arbitrary keys (like conserveScore, which might
1660 // mess up the optimizer).
1661 for (let subfact in subfacts) {
1662 if (!SUBFACTS.hasOwnProperty(subfact) || !(SUBFACTS[subfact] & getSubfacts.possibleSubfacts)) {
1663 // The ES5.1 spec says in 12.6.4 that it's fine to delete
1665 delete subfacts[subfact];
1670 // Thse are the subfacts this call could affect:
1671 getSubfacts.possibleSubfacts = TYPE | NOTE | SCORE | ELEMENT;
1672 getSubfacts.kind = 'props';
1673 return new this.constructor(this._calls.concat(getSubfacts), this._max, this._types);
1677 * Set the type applied to fnodes processed by this RHS.
1680 // In the future, we might also support providing a callback that receives
1681 // the fnode and returns a type. We couldn't reason based on these, but the
1682 // use would be rather a consise way to to override part of what a previous
1683 // .props() call provides.
1685 // Actually emit a given type.
1686 function getSubfacts() {
1687 return { type: theType };
1689 getSubfacts.possibleSubfacts = TYPE;
1690 getSubfacts.type = theType;
1691 getSubfacts.kind = 'type';
1692 return new this.constructor(this._calls.concat(getSubfacts), this._max, this._types);
1696 * Constrain this rule to emit 1 of a set of given types. Pass no args to lift
1697 * a previous ``typeIn`` constraint, as you might do when basing a LHS on a
1698 * common value to factor out repetition.
1700 * ``typeIn`` is mostly a hint for the query planner when you're emitting types
1701 * dynamically from ``props`` calls—in fact, an error will be raised if
1702 * ``props`` is used without a ``typeIn`` or ``type`` to constrain it—but it
1703 * also checks conformance at runtime to ensure validity.
1706 // Rationale: If we used the spelling "type('a', 'b', ...)" instead of
1707 // this, one might expect type('a', 'b').type(fn) to have the latter
1708 // call override, while expecting type(fn).type('a', 'b') to keep both
1709 // in effect. Then different calls to type() don't consistently
1710 // override each other, and the rules get complicated. Plus you can't
1711 // inherit a type constraint and then sub in another type-returning
1712 // function that still gets the constraint applied.
1713 return new this.constructor(this._calls, this._max, types);
1717 * Check a fact for conformance with any typeIn() call.
1719 * @arg leftType the type of the LHS, which becomes my emitted type if the
1720 * fact doesn't specify one
1722 _checkTypeIn(result, leftType) {
1723 if (this._types.size > 0) {
1724 if (result.type === undefined) {
1725 if (!this._types.has(leftType)) {
1727 `A right-hand side claimed, via typeIn(...) to emit one of the types ${this._types} but actually inherited ${leftType} from the left-hand side.`
1730 } else if (!this._types.has(result.type)) {
1732 `A right-hand side claimed, via typeIn(...) to emit one of the types ${this._types} but actually emitted ${result.type}.`
1739 * Whatever the callback returns (even ``undefined``) becomes the note of
1740 * the fact. This overrides any previous call to ``note``.
1743 function getSubfacts(fnode) {
1744 return { note: callback(fnode) };
1746 getSubfacts.possibleSubfacts = NOTE;
1747 getSubfacts.kind = 'note';
1748 return new this.constructor(this._calls.concat(getSubfacts), this._max, this._types);
1752 * Affect the confidence with which the input node should be considered a
1755 * The parameter is generally between 0 and 1 (inclusive), with 0 meaning
1756 * the node does not have the "smell" this rule checks for and 1 meaning it
1757 * does. The range between 0 and 1 is available to represent "fuzzy"
1758 * confidences. If you have an unbounded range to compress down to [0, 1],
1759 * consider using :func:`sigmoid` or a scaling thereof.
1761 * Since every node can have multiple, independent scores (one for each
1762 * type), this applies to the type explicitly set by the RHS or, if none,
1763 * to the type named by the ``type`` call on the LHS. If the LHS has none
1764 * because it's a ``dom(...)`` LHS, an error is raised.
1766 * @arg {number|function} scoreOrCallback Can either be a static number,
1767 * generally 0 to 1 inclusive, or else a callback which takes the fnode
1768 * and returns such a number. If the callback returns a boolean, it is
1771 score(scoreOrCallback) {
1774 function getSubfactsFromNumber(fnode) {
1775 return { score: scoreOrCallback };
1778 function getSubfactsFromFunction(fnode) {
1779 let result = scoreOrCallback(fnode);
1780 if (typeof result === 'boolean') {
1781 // Case bools to numbers for convenience. Boolean features are
1782 // common. Don't cast other things, as it frustrates ruleset
1784 result = Number(result);
1786 return { score: result };
1789 if (typeof scoreOrCallback === 'number') {
1790 getSubfacts = getSubfactsFromNumber;
1792 getSubfacts = getSubfactsFromFunction;
1794 getSubfacts.possibleSubfacts = SCORE;
1795 getSubfacts.kind = 'score';
1797 return new this.constructor(this._calls.concat(getSubfacts), this._max, this._types);
1800 // Future: why not have an .element() method for completeness?
1802 // -------- Methods below this point are private to the framework. --------
1805 * Run all my props().type().note().score() stuff across a given fnode,
1806 * enforce my max() stuff, and return a fact ({element, type, score,
1807 * notes}) for incorporation into that fnode (or a different one, if
1808 * element is specified). Any of the 4 fact properties can be missing;
1809 * filling in defaults is a job for the caller.
1811 * @arg leftType The type the LHS takes in
1813 fact(fnode, leftType) {
1814 const doneKinds = new Set();
1816 let haveSubfacts = 0;
1817 for (let call of reversed(this._calls)) {
1818 // If we've already called a call of this kind, then forget it.
1819 if (!doneKinds.has(call.kind)) {
1820 doneKinds.add(call.kind);
1822 if (~haveSubfacts & call.possibleSubfacts) {
1823 // This call might provide a subfact we are missing.
1824 const newSubfacts = call(fnode);
1826 // We start with an empty object, so we're okay here.
1827 // eslint-disable-next-line guard-for-in
1828 for (let subfact in newSubfacts) {
1829 // A props() callback could insert arbitrary keys into
1830 // the result, but it shouldn't matter, because nothing
1831 // pays any attention to them.
1832 if (!result.hasOwnProperty(subfact)) {
1833 result[subfact] = newSubfacts[subfact];
1835 haveSubfacts |= SUBFACTS[subfact];
1840 this._checkAtMost(result);
1841 this._checkTypeIn(result, leftType);
1846 * Return a record describing the types I might emit (which means either to
1847 * add a type to a fnode or to output a fnode that already has that type).
1848 * {couldChangeType: whether I might add a type to the fnode,
1849 * possibleTypes: If couldChangeType, the types I might emit; empty set if
1850 * we cannot infer it. If not couldChangeType, undefined.}
1852 possibleEmissions() {
1853 // If there is a typeIn() constraint or there is a type() call to the
1854 // right of all props() calls, we have a constraint. We hunt for the
1855 // tightest constraint we can find, favoring a type() call because it
1856 // gives us a single type but then falling back to a typeIn().
1857 let couldChangeType = false;
1858 for (let call of reversed(this._calls)) {
1859 if (call.kind === 'props') {
1860 couldChangeType = true;
1862 } else if (call.kind === 'type') {
1863 return { couldChangeType: true, possibleTypes: new Set([call.type]) };
1866 return { couldChangeType, possibleTypes: this._types };
1871 constructor(key, through = (x) => x, allThrough = (x) => x) {
1873 this.callback = through;
1874 this.allCallback = allThrough;
1878 * Append ``.through`` to :func:`out` to run each :term:`fnode` emitted
1879 * from the LHS through an arbitrary function before returning it to the
1880 * containing program. Example::
1882 * out('titleLengths').through(fnode => fnode.noteFor('title').length)
1885 return new this.constructor(this.key, callback, this.allCallback);
1889 * Append ``.allThrough`` to :func:`out` to run the entire iterable of
1890 * emitted :term:`fnodes<fnode>` through an arbitrary function before
1891 * returning them to the containing program. Example::
1893 * out('sortedTitles').allThrough(domSort)
1895 allThrough(callback) {
1896 return new this.constructor(this.key, this.callback, callback);
1904 function props(callback) {
1905 return new Side({ method: 'props', args: [callback] });
1908 /** Constrain to an input type on the LHS, or apply a type on the RHS. */
1909 function type(theType) {
1910 return new Side({ method: 'type', args: [theType] });
1913 function note(callback) {
1914 return new Side({ method: 'note', args: [callback] });
1917 function score(scoreOrCallback) {
1918 return new Side({ method: 'score', args: [scoreOrCallback] });
1921 function atMost(score) {
1922 return new Side({ method: 'atMost', args: [score] });
1925 function typeIn(...types) {
1926 return new Side({ method: 'typeIn', args: types });
1930 * Pull nodes that conform to multiple conditions at once.
1932 * For example: ``and(type('title'), type('english'))``
1934 * Caveats: ``and`` supports only simple ``type`` calls as arguments for now,
1935 * and it may fire off more rules as prerequisites than strictly necessary.
1936 * ``not`` and ``or`` don't exist yet, but you can express ``or`` the long way
1937 * around by having 2 rules with identical RHSs.
1939 function and(...lhss) {
1940 return new Side({ method: 'and', args: lhss });
1944 * Experimental. For each :term:`fnode` from ``typeCallA``, find the closest
1945 * node from ``typeCallB``, and attach it as a note. The note is attached to
1946 * the type specified by the RHS, defaulting to the type of ``typeCallA``. If
1947 * no nodes are emitted from ``typeCallB``, do nothing.
1951 * nearest(type('image'), type('price'))
1953 * The score of the ``typeCallA`` can be added to the new type's score by using
1954 * :func:`conserveScore` (though this routine has since been removed)::
1956 * rule(nearest(type('image'), type('price')),
1957 * type('imageWithPrice').score(2).conserveScore())
1959 * Caveats: ``nearest`` supports only simple ``type`` calls as arguments ``a``
1960 * and ``b`` for now.
1962 * @arg distance {function} A function that takes 2 fnodes and returns a
1963 * numerical distance between them. Included options are :func:`distance`,
1964 * which is a weighted topological distance, and :func:`euclidean`, which
1965 * is a spatial distance.
1967 function nearest(typeCallA, typeCallB, distance = euclidean) {
1970 args: [typeCallA, typeCallB, distance],
1975 * A chain of calls that can be compiled into a Rhs or Lhs, depending on its
1976 * position in a Rule. This lets us use type() as a leading call for both RHSs
1977 * and LHSs. I would prefer to do this dynamically, but that wouldn't compile
1978 * down to old versions of ES.
1981 constructor(...calls) {
1982 // A "call" is like {method: 'dom', args: ['p.smoo']}.
1983 this._calls = calls;
1987 return this._and('max');
1990 bestCluster(options) {
1991 return this._and('bestCluster', options);
1995 return this._and('props', callback);
1999 return this._and('type', ...types);
2003 return this._and('note', callback);
2006 score(scoreOrCallback) {
2007 return this._and('score', scoreOrCallback);
2011 return this._and('atMost', score);
2015 return this._and('typeIn', ...types);
2019 return this._and('and', lhss);
2022 _and(method, ...args) {
2023 return new this.constructor(...this._calls.concat({ method, args }));
2027 return this._asSide(Lhs.fromFirstCall(this._calls[0]), this._calls.slice(1));
2031 return this._asSide(new InwardRhs(), this._calls);
2034 _asSide(side, calls) {
2035 for (let call of calls) {
2036 side = side[call.method](...call.args);
2042 return this._and('when', pred);
2047 * A wrapper around a DOM node, storing :term:`types<type>`,
2048 * :term:`scores<score>`, and :term:`notes<note>` that apply to it
2052 * @arg element The DOM element described by the fnode.
2053 * @arg ruleset The ruleset which created the fnode.
2055 constructor(element, ruleset) {
2056 if (element === undefined) {
2057 throw new Error("Someone tried to make a fnode without specifying the element they're talking about.");
2060 * The raw DOM element this fnode describes
2062 this.element = element;
2063 this._ruleset = ruleset;
2065 // A map of type => {score: number, note: anything}. `score` is always
2066 // present and defaults to 1. A note is set iff `note` is present and
2068 this._types = new Map();
2070 // Note: conserveScore() is temporarily absent in 3.0.
2072 // By default, a fnode has an independent score for each of its types.
2073 // However, a RHS can opt to conserve the score of an upstream type,
2074 // carrying it forward into another type. To avoid runaway scores in
2075 // the case that multiple rules choose to do this, we limit the
2076 // contribution of an upstream type's score to being multiplied in a
2077 // single time. In this set, we keep track of which upstream types'
2078 // scores have already been multiplied into each type. LHS fnode => Set
2079 // of types whose score for that node have been multiplied into this
2081 this._conservedScores = new Map();
2085 * Return whether the given type is one of the ones attached to the wrapped
2089 // Run type(theType) against the ruleset to make sure this doesn't
2090 // return false just because we haven't lazily run certain rules yet.
2091 this._computeType(type);
2092 return this._types.has(type);
2096 * Return the confidence, in the range (0, 1), that the fnode belongs to the
2097 * given type, 0 by default.
2100 this._computeType(type);
2102 this._ruleset.weightedScore(this.scoresSoFarFor(type)) + getDefault(this._ruleset.biases, type, () => 0)
2107 * Return the fnode's note for the given type, ``undefined`` if none.
2110 this._computeType(type);
2111 return this._noteSoFarFor(type);
2115 * Return whether this fnode has a note for the given type.
2117 * ``undefined`` is not considered a note and may be overwritten with
2121 this._computeType(type);
2122 return this._hasNoteSoFarFor(type);
2125 // -------- Methods below this point are private to the framework. --------
2128 * Return an iterable of the types tagged onto me by rules that have
2132 return this._types.keys();
2135 _noteSoFarFor(type) {
2136 return this._typeRecordForGetting(type).note;
2139 _hasNoteSoFarFor(type) {
2140 return this._noteSoFarFor(type) !== undefined;
2144 * Return the score thus far computed on me for a certain type. Doesn't
2145 * implicitly run any rules. If no score has yet been determined for the
2146 * given type, return undefined.
2148 scoresSoFarFor(type) {
2149 return this._typeRecordForGetting(type).score;
2153 * Add a given number to one of our per-type scores. Implicitly assign us
2154 * the given type. Keep track of which rule it resulted from so we can
2155 * later mess with the coeffs.
2157 addScoreFor(type, score, ruleName) {
2158 this._typeRecordForSetting(type).score.set(ruleName, score);
2162 * Set the note attached to one of our types. Implicitly assign us that
2163 * type if we don't have it already.
2165 setNoteFor(type, note) {
2166 if (this._hasNoteSoFarFor(type)) {
2167 if (note !== undefined) {
2169 `Someone (likely the right-hand side of a rule) tried to add a note of type ${type} to an element, but one of that type already exists. Overwriting notes is not allowed, since it would make the order of rules matter.`
2172 // else the incoming note is undefined and we already have the
2173 // type, so it's a no-op
2175 // Apply either a type and note or just a type (which means a note
2176 // that is undefined):
2177 this._typeRecordForSetting(type).note = note;
2182 * Return a score/note record for a type, creating it if it doesn't exist.
2184 _typeRecordForSetting(type) {
2185 return setDefault(this._types, type, () => ({ score: new Map() }));
2189 * Manifest a temporary type record for reading, working around the lack of
2190 * a .? operator in JS.
2192 _typeRecordForGetting(type) {
2193 return getDefault(this._types, type, () => ({ score: new Map() }));
2197 * Make sure any scores, notes, and type-tagging for the given type are
2198 * computed for my element.
2200 _computeType(theType) {
2201 if (!this._types.has(theType)) {
2202 // Prevent infinite recursion when an A->A rule looks at A's note in a callback.
2203 this._ruleset.get(type(theType));
2209 * Construct and return the proper type of rule class based on the
2210 * inwardness/outwardness of the RHS.
2212 * @arg lhs {Lhs} The left-hand side of the rule
2213 * @arg rhs {Rhs} The right-hand side of the rule
2214 * @arg options {object} Other, optional information about the rule.
2215 * Currently, the only recognized option is ``name``, which points to a
2216 * string that uniquely identifies this rule in a ruleset. The name
2217 * correlates this rule with one of the coefficients passed into
2218 * :func:`ruleset`. If no name is given, an identifier is assigned based on
2219 * the index of this rule in the ruleset, but that is, of course, brittle.
2221 function rule(lhs, rhs, options) {
2222 // Since out() is a valid call only on the RHS (unlike type()), we can take
2223 // a shortcut here: any outward RHS will already be an OutwardRhs; we don't
2224 // need to sidetrack it through being a Side. And OutwardRhs has an asRhs()
2225 // that just returns itself.
2226 if (typeof rhs === 'string') {
2229 return new (rhs instanceof OutwardRhs ? OutwardRule : InwardRule)(lhs, rhs, options);
2232 let nextRuleNumber = 0;
2233 function newInternalRuleName() {
2234 return '_' + nextRuleNumber++;
2238 * We place the in/out distinction in Rules because it determines whether the
2239 * RHS result is cached, and Rules are responsible for maintaining the rulewise
2240 * cache ruleset.ruleCache.
2244 constructor(lhs, rhs, options) {
2245 this.lhs = lhs.asLhs();
2246 this.rhs = rhs.asRhs();
2247 // TODO: Make auto-generated rule names be based on the out types of
2248 // the rules, e.g. _priceish_4. That way, adding rules for one type
2249 // won't make the coeffs misalign for another.
2250 this.name = (options ? options.name : undefined) || newInternalRuleName();
2254 * Return a NiceSet of the rules that this one shallowly depends on in the
2255 * given ruleset. In a BoundRuleset, this may include rules that have
2256 * already been executed.
2258 * Depend on emitters of any LHS type this rule finalizes. (See
2259 * _typesFinalized for a definition.) Depend on adders of any other LHS
2260 * types (because, after all, we need to know what nodes have that type in
2261 * order to find the set of LHS nodes). This works for simple rules and
2262 * complex ones like and().
2264 * Specific examples (where A is a type):
2265 * * A.max->* depends on anything emitting A.
2266 * * Even A.max->A depends on A emitters, because we have to have all the
2267 * scores factored in first. For example, what if we did
2268 * max(A)->score(.5)?
2269 * * A->A depends on anything adding A.
2270 * * A->(something other than A) depends on anything emitting A. (For
2271 * example, we need the A score finalized before we could transfer it to
2272 * B using conserveScore().)
2273 * * A->out() also depends on anything emitting A. Fnode methods aren't
2274 * smart enough to lazily run emitter rules as needed. We could make them
2275 * so if it was shown to be an advantage.
2277 prerequisites(ruleset) {
2278 // Optimization: we could cache the result of this when in a compiled (immutable) ruleset.
2280 // Extend prereqs with rules derived from each of the given types. If
2281 // no rules are found, raise an exception, as that indicates a
2282 // malformed ruleset.
2283 function extendOrThrow(prereqs, types, ruleGetter, verb) {
2284 for (let type of types) {
2285 const rules = ruleGetter(type);
2286 if (rules.length > 0) {
2287 prereqs.extend(rules);
2289 throw new Error(`No rule ${verb} the "${type}" type, but another rule needs it as input.`);
2294 const prereqs = new NiceSet();
2296 // Add finalized types:
2297 extendOrThrow(prereqs, this._typesFinalized(), (type) => ruleset.inwardRulesThatCouldEmit(type), 'emits');
2299 // Add mentioned types:
2300 // We could say this.lhs.typesMentioned().minus(typesFinalized) as an
2301 // optimization. But since types mentioned are a superset of types
2302 // finalized and rules adding are a subset of rules emitting, we get
2303 // the same result without.
2304 extendOrThrow(prereqs, this.lhs.typesMentioned(), (type) => ruleset.inwardRulesThatCouldAdd(type), 'adds');
2310 * Return the types that this rule finalizes.
2312 * To "finalize" a type means to make sure we're finished running all
2313 * possible rules that might change a node's score or notes w.r.t. a given
2314 * type. This is generally done because we're about to use those data for
2315 * something, like computing a new type's score or or an aggregate
2316 * function. Exhaustively, we're about to...
2317 * * change the type of the nodes or
2318 * * aggregate all nodes of a type
2320 * This base-class implementation just returns what aggregate functions
2321 * need, since that need spans inward and outward rules.
2323 * @return Set of types
2326 // Get the types that are fed to aggregate functions. Aggregate
2327 // functions are more demanding than a simple type() LHS. A type() LHS
2328 // itself does not finalize its nodes because the things it could do to
2329 // them without changing their type (adding notes, adding to score)
2330 // are immutable or commutative (respectively). Thus, we require a RHS
2331 // type change in order to require finalization of a simple type()
2332 // mention. A max(B), OTOH, is not commutative with other B->B rules
2333 // (imagine type(B).max()->score(.5)), so it must depend on B emitters
2334 // and thus finalize B. (This will have to be relaxed or rethought when
2335 // we do the max()/atMost() optimization. Perhaps we can delegate to
2336 // aggregate functions up in Rule.prerequisites() to ask what their
2337 // prereqs are. If they implement such an optimization, they can reply.
2338 // Otherwise, we can assume they are all the nodes of their type.)
2340 // TODO: Could arbitrary predicates (once we implement those) matter
2341 // too? Maybe it's not just aggregations.
2342 const type = this.lhs.aggregatedType();
2343 return type === undefined ? new NiceSet() : new NiceSet([type]);
2348 * A normal rule, whose results head back into the Fathom knowledgebase, to be
2349 * operated on by further rules.
2351 class InwardRule extends Rule {
2352 // TODO: On construct, complain about useless rules, like a dom() rule that
2353 // doesn't assign a type.
2356 * Return an iterable of the fnodes emitted by the RHS of this rule.
2357 * Side effect: update ruleset's store of fnodes, its accounting of which
2358 * rules are done executing, and its cache of results per type.
2361 if (ruleset.doneRules.has(this)) {
2364 'A bug in Fathom caused results() to be called on an inward rule twice. That could cause redundant score contributions, etc.'
2368 // For now, we consider most of what a LHS computes to be cheap, aside
2369 // from type() and type().max(), which are cached by their specialized
2371 const leftResults = this.lhs.fnodes(ruleset);
2372 // Avoid returning a single fnode more than once. LHSs uniquify
2373 // themselves, but the RHS can change the element it's talking
2374 // about and thus end up with dupes.
2375 const returnedFnodes = new Set();
2377 // Merge facts into fnodes:
2379 // leftResult can be either a fnode or a {fnode, rhsTransformer} pair.
2380 function updateFnode(leftResult) {
2381 const leftType = self.lhs.guaranteedType();
2382 // Get a fnode and a RHS transformer, whether a plain fnode is
2383 // returned or a {fnode, rhsTransformer} pair:
2384 const { fnode: leftFnode = leftResult, rhsTransformer = identity } = leftResult;
2385 // Grab the fact from the RHS, and run the LHS's optional
2386 // transformer over it to pick up anything special it wants to
2388 const fact = rhsTransformer(self.rhs.fact(leftFnode, leftType));
2389 self.lhs.checkFact(fact);
2390 const rightFnode = ruleset.fnodeForElement(fact.element || leftFnode.element);
2391 // If the RHS doesn't specify a type, default to the
2392 // type of the LHS, if any:
2393 const rightType = fact.type || self.lhs.guaranteedType();
2394 if (fact.score !== undefined) {
2395 if (rightType !== undefined) {
2396 rightFnode.addScoreFor(rightType, fact.score, self.name);
2399 `The right-hand side of a rule specified a score (${fact.score}) with neither an explicit type nor one we could infer from the left-hand side.`
2403 if (fact.type !== undefined || fact.note !== undefined) {
2404 // There's a reason to call setNoteFor.
2405 if (rightType === undefined) {
2407 `The right-hand side of a rule specified a note (${fact.note}) with neither an explicit type nor one we could infer from the left-hand side. Notes are per-type, per-node, so that's a problem.`
2410 rightFnode.setNoteFor(rightType, fact.note);
2413 returnedFnodes.add(rightFnode);
2418 // Update ruleset lookup tables.
2419 // First, mark this rule as done:
2420 ruleset.doneRules.add(this);
2421 // Then, stick each fnode in typeCache under all applicable types.
2422 // Optimization: we really only need to loop over the types
2423 // this rule can possibly add.
2424 for (let fnode of returnedFnodes) {
2425 for (let type of fnode.typesSoFar()) {
2426 setDefault(ruleset.typeCache, type, () => new Set()).add(fnode);
2429 return returnedFnodes.values();
2433 * Return a Set of the types that could be emitted back into the system.
2434 * To emit a type means to either to add it to a fnode emitted from the RHS
2435 * or to leave it on such a fnode where it already exists.
2437 typesItCouldEmit() {
2438 const rhs = this.rhs.possibleEmissions();
2439 if (!rhs.couldChangeType && this.lhs.guaranteedType() !== undefined) {
2440 // It's a b -> b rule.
2441 return new Set([this.lhs.guaranteedType()]);
2442 } else if (rhs.possibleTypes.size > 0) {
2443 // We can prove the type emission from the RHS alone.
2444 return rhs.possibleTypes;
2447 'Could not determine the emitted type of a rule because its right-hand side calls props() without calling typeIn().'
2453 * Return a Set of types I could add to fnodes I output (where the fnodes
2454 * did not already have them).
2457 const ret = new Set(this.typesItCouldEmit());
2458 ret.delete(this.lhs.guaranteedType());
2463 * Add the types we could change to the superclass's result.
2467 function typesThatCouldChange() {
2468 const ret = new NiceSet();
2470 // Get types that could change:
2471 const emissions = self.rhs.possibleEmissions();
2472 if (emissions.couldChangeType) {
2473 // Get the possible guaranteed combinations of types on the LHS
2474 // (taking just this LHS into account). For each combo, if the RHS
2475 // adds a type that's not in the combo, the types in the combo get
2476 // unioned into ret.
2477 for (let combo of self.lhs.possibleTypeCombinations()) {
2478 for (let rhsType of emissions.possibleTypes) {
2479 if (!combo.has(rhsType)) {
2486 // Optimization: the possible combos could be later expanded to be
2487 // informed by earlier rules which add the types mentioned in the LHS.
2488 // If the only way for something to get B is to have Q first, then we
2489 // can add Q to each combo and end up with fewer types finalized. Would
2490 // this imply the existence of a Q->B->Q cycle and thus be impossible?
2491 // Think about it. If we do this, we can centralize that logic here,
2492 // rather than repeating it in all the Lhs subclasses).
2496 return typesThatCouldChange().extend(super._typesFinalized());
2501 * A rule whose RHS is an out(). This represents a final goal of a ruleset.
2502 * Its results go out into the world, not inward back into the Fathom
2505 class OutwardRule extends Rule {
2507 * Compute the whole thing, including any .through() and .allThrough().
2508 * Do not mark me done in ruleset.doneRules; out rules are never marked as
2509 * done so they can be requested many times without having to cache their
2510 * (potentially big, since they aren't necessarily fnodes?) results. (We
2511 * can add caching later if it proves beneficial.)
2515 * From a LHS's ``{fnode, rhsTransform}`` object or plain fnode, pick off just
2516 * the fnode and return it.
2518 function justFnode(fnodeOrStruct) {
2519 return fnodeOrStruct instanceof Fnode ? fnodeOrStruct : fnodeOrStruct.fnode;
2522 return this.rhs.allCallback(map(this.rhs.callback, map(justFnode, this.lhs.fnodes(ruleset))));
2526 * @return the key under which the output of this rule will be available
2529 return this.rhs.key;
2533 * OutwardRules finalize all types mentioned.
2536 return this.lhs.typesMentioned().extend(super._typesFinalized());
2541 * A shortcut for creating a new :class:`Ruleset`, for symmetry with
2544 function ruleset(rules, coeffs = [], biases = []) {
2545 return new Ruleset(rules, coeffs, biases);
2549 * An unbound ruleset. When you bind it by calling :func:`~Ruleset.against()`,
2550 * the resulting :class:`BoundRuleset` will be immutable.
2554 * @arg rules {Array} Rules returned from :func:`rule`
2555 * @arg coeffs {Map} A map of rule names to numerical weights, typically
2556 * returned by the :doc:`trainer<training>`. Example:
2557 * ``[['someRuleName', 5.04], ...]``. If not given, coefficients
2559 * @arg biases {object} A map of type names to neural-net biases. These
2560 * enable accurate confidence estimates. Example: ``[['someType',
2561 * -2.08], ...]``. If absent, biases default to 0.
2563 constructor(rules, coeffs = [], biases = []) {
2565 this._outRules = new Map(); // key -> rule
2566 this._rulesThatCouldEmit = new Map(); // type -> [rules]
2567 this._rulesThatCouldAdd = new Map(); // type -> [rules]
2568 // Private to the framework:
2569 this._coeffs = new Map(coeffs); // rule name => coefficient
2570 this.biases = new Map(biases); // type name => bias
2572 // Separate rules into out ones and in ones, and sock them away. We do
2573 // this here so mistakes raise errors early.
2574 for (let rule of rules) {
2575 if (rule instanceof InwardRule) {
2576 this._inRules.push(rule);
2578 // Keep track of what inward rules can emit or add:
2579 // TODO: Combine these hashes for space efficiency:
2580 const emittedTypes = rule.typesItCouldEmit();
2581 for (let type of emittedTypes) {
2582 setDefault(this._rulesThatCouldEmit, type, () => []).push(rule);
2584 for (let type of rule.typesItCouldAdd()) {
2585 setDefault(this._rulesThatCouldAdd, type, () => []).push(rule);
2587 } else if (rule instanceof OutwardRule) {
2588 this._outRules.set(rule.key(), rule);
2590 throw new Error(`This element of ruleset()'s first param wasn't a rule: ${rule}`);
2596 * Commit this ruleset to running against a specific DOM tree or subtree.
2598 * When run against a subtree, the root of the subtree is not considered as
2601 * This doesn't actually modify the Ruleset but rather returns a fresh
2602 * :class:`BoundRuleset`, which contains caches and other stateful, per-DOM
2606 return new BoundRuleset(
2610 this._rulesThatCouldEmit,
2611 this._rulesThatCouldAdd,
2618 * Return all the rules (both inward and outward) that make up this ruleset.
2620 * From this, you can construct another ruleset like this one but with your
2624 return Array.from([...this._inRules, ...this._outRules.values()]);
2629 * A ruleset that is earmarked to analyze a certain DOM
2631 * Carries a cache of rule results on that DOM. Typically comes from
2632 * :meth:`~Ruleset.against`.
2634 class BoundRuleset {
2636 * @arg inRules {Array} Non-out() rules
2637 * @arg outRules {Map} Output key -> out() rule
2639 constructor(doc, inRules, outRules, rulesThatCouldEmit, rulesThatCouldAdd, coeffs, biases) {
2641 this._inRules = inRules;
2642 this._outRules = outRules;
2643 this._rulesThatCouldEmit = rulesThatCouldEmit;
2644 this._rulesThatCouldAdd = rulesThatCouldAdd;
2645 this._coeffs = coeffs;
2647 // Private, for the use of only helper classes:
2648 this.biases = biases;
2649 this._clearCaches();
2650 this.elementCache = new WeakMap(); // DOM element => fnode about it
2651 this.doneRules = new Set(); // InwardRules that have been executed. OutwardRules can be executed more than once because they don't change any fnodes and are thus idempotent.
2655 * Change my coefficients and biases after construction.
2657 * @arg coeffs See the :class:`Ruleset` constructor.
2658 * @arg biases See the :class:`Ruleset` constructor.
2660 setCoeffsAndBiases(coeffs, biases = []) {
2661 // Destructuring assignment doesn't make it through rollup properly
2662 // (https://github.com/rollup/rollup-plugin-commonjs/issues/358):
2663 this._coeffs = new Map(coeffs);
2664 this.biases = new Map(biases);
2665 this._clearCaches();
2669 * Clear the typeCache and maxCache, usually in the wake of changing
2670 * ``this._coeffs``, because both of thise depend on weighted scores.
2673 this.maxCache = new Map(); // type => Array of max fnode (or fnodes, if tied) of this type
2674 this.typeCache = new Map(); // type => Set of all fnodes of this type found so far. (The dependency resolution during execution ensures that individual types will be comprehensive just in time.)
2678 * Return an array of zero or more fnodes.
2679 * @arg thing {string|Lhs|Node} Can be
2681 * (1) A string which matches up with an "out" rule in the ruleset.
2682 * If the out rule uses through(), the results of through's
2683 * callback (which might not be fnodes) will be returned.
2684 * (2) An arbitrary LHS which we calculate and return the results of.
2685 * (3) A DOM node, for which we will return the corresponding fnode.
2687 * Results are cached for cases (1) and (3).
2690 if (typeof thing === 'string') {
2691 if (this._outRules.has(thing)) {
2692 return Array.from(this._execute(this._outRules.get(thing)));
2694 throw new Error(`There is no out() rule with key "${thing}".`);
2696 } else if (isDomElement(thing)) {
2697 // Return the fnode and let it run type(foo) on demand, as people
2698 // ask it things like scoreFor(foo).
2699 return this.fnodeForElement(thing);
2700 } else if (thing.asLhs !== undefined) {
2701 // Make a temporary out rule, and run it. This may add things to
2702 // the ruleset's cache, but that's fine: it doesn't change any
2703 // future results; it just might make them faster. For example, if
2704 // you ask for .get(type('smoo')) twice, the second time will be a
2706 const outRule = rule(thing, out(Symbol('outKey')));
2707 return Array.from(this._execute(outRule));
2710 'ruleset.get() expects a string, an expression like on the left-hand side of a rule, or a DOM node.'
2716 * Return the weighted sum of the per-rule, per-type scores from a fnode.
2718 * @arg mapOfScores a Map of rule name to the [0, 1] score it computed for
2719 * the type in question
2721 weightedScore(mapOfScores) {
2723 for (const [name, score] of mapOfScores) {
2724 total += score * getDefault(this._coeffs, name, () => 1);
2729 // Provide an opaque context object to be made available to all ranker
2731 // context (object) {
2732 // self.context = object;
2735 // -------- Methods below this point are private to the framework. --------
2738 * Return all the thus-far-unexecuted rules that will have to run to run
2739 * the requested rule, in the form of Map(prereq: [rulesItIsNeededBy]).
2741 _prerequisitesTo(rule, undonePrereqs = new Map()) {
2742 for (let prereq of rule.prerequisites(this)) {
2743 if (!this.doneRules.has(prereq)) {
2744 // prereq is not already run. (If it were, we wouldn't care
2745 // about adding it to the graph.)
2746 const alreadyAdded = undonePrereqs.has(prereq);
2747 setDefault(undonePrereqs, prereq, () => []).push(rule);
2749 // alreadyAdded means we've already computed the prereqs of
2750 // this prereq and added them to undonePrereqs. So, now
2751 // that we've hooked up the rule to this prereq in the
2752 // graph, we can stop. But, if we haven't, then...
2753 if (!alreadyAdded) {
2754 this._prerequisitesTo(prereq, undonePrereqs);
2758 return undonePrereqs;
2762 * Run the given rule (and its dependencies, in the proper order), and
2763 * return its results.
2765 * The caller is responsible for ensuring that _execute() is not called
2766 * more than once for a given InwardRule, lest non-idempotent
2767 * transformations, like score contributions, be applied to fnodes more
2770 * The basic idea is to sort rules in topological order (according to input
2771 * and output types) and then run them. On top of that, we do some
2772 * optimizations. We keep a cache of results by type (whether partial or
2773 * comprehensive--either way, the topology ensures that any
2774 * non-comprehensive typeCache entry is made comprehensive before another
2775 * rule needs it). And we prune our search for prerequisite rules at the
2776 * first encountered already-executed rule.
2779 const prereqs = this._prerequisitesTo(rule);
2782 sorted = [rule].concat(toposort(prereqs.keys(), (prereq) => prereqs.get(prereq)));
2784 if (exc instanceof CycleError) {
2785 throw new CycleError('There is a cyclic dependency in the ruleset.');
2791 for (let eachRule of reversed(sorted)) {
2792 // Sock each set of results away in this.typeCache:
2793 fnodes = eachRule.results(this);
2795 return Array.from(fnodes);
2798 /** @return {Rule[]} */
2799 inwardRulesThatCouldEmit(type) {
2800 return getDefault(this._rulesThatCouldEmit, type, () => []);
2803 /** @return {Rule[]} */
2804 inwardRulesThatCouldAdd(type) {
2805 return getDefault(this._rulesThatCouldAdd, type, () => []);
2809 * @return the Fathom node that describes the given DOM element. This does
2810 * not trigger any execution, so the result may be incomplete.
2812 fnodeForElement(element) {
2813 return setDefault(this.elementCache, element, () => new Fnode(element, this));
2817 /* This Source Code Form is subject to the terms of the Mozilla Public
2818 * License, v. 2.0. If a copy of the MPL was not distributed with this
2819 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
2821 const version = '3.7.3';
2827 clusters$1 as clusters,
2841 utilsForFrontend as utils,