1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 ChromeUtils.defineESModuleGetters(lazy, {
9 "resource://gre/modules/components-utils/JsonSchemaValidator.sys.mjs",
10 OpenGraphPageData: "resource:///modules/pagedata/OpenGraphPageData.sys.mjs",
11 SchemaOrgPageData: "resource:///modules/pagedata/SchemaOrgPageData.sys.mjs",
12 TwitterPageData: "resource:///modules/pagedata/TwitterPageData.sys.mjs",
15 ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
16 return console.createInstance({
18 maxLogLevel: Services.prefs.getBoolPref("browser.pagedata.log", false)
25 * The list of page data collectors. These should be sorted in order of
26 * specificity, if the same piece of data is provided by two collectors then the
29 * Collectors must provide a `collect` function which will be passed the
30 * document object and should return the PageData structure. The function may be
31 * asynchronous if needed.
33 * The data returned need not be valid, collectors should return whatever they
34 * can and then we drop anything that is invalid once all data is joined.
36 ChromeUtils.defineLazyGetter(lazy, "DATA_COLLECTORS", function () {
37 return [lazy.SchemaOrgPageData, lazy.OpenGraphPageData, lazy.TwitterPageData];
40 let SCHEMAS = new Map();
43 * Loads the schema for the given name.
45 * @param {string} schemaName
46 * The name of the schema to load.
50 async function loadSchema(schemaName) {
51 if (SCHEMAS.has(schemaName)) {
52 return SCHEMAS.get(schemaName);
55 let url = `chrome://browser/content/pagedata/schemas/${schemaName.toLocaleLowerCase()}.schema.json`;
56 let response = await fetch(url);
58 throw new Error(`Failed to load schema: ${response.statusText}`);
61 let schema = await response.json();
62 SCHEMAS.set(schemaName, schema);
67 * Validates the data using the schema with the given name.
69 * @param {string} schemaName
70 * The name of the schema to validate against.
71 * @param {object} data
72 * The data to validate.
74 async function validateData(schemaName, data) {
75 let schema = await loadSchema(schemaName.toLocaleLowerCase());
77 let result = lazy.JsonSchemaValidator.validate(data, schema, {
78 allowExplicitUndefinedProperties: true,
79 // Allowed for future expansion of the schema.
80 allowAdditionalProperties: true,
89 * A shared API that can be used in parent or child processes
91 export const PageDataSchema = {
92 // Enumeration of data types. The keys must match the schema name.
93 DATA_TYPE: Object.freeze({
94 // Note that 1 and 2 were used as types in earlier versions and should not be used here.
103 * Gets the data type name.
105 * @param {DATA_TYPE} type
106 * The data type from the DATA_TYPE enumeration
108 * @returns {string | null} The name for the type or null if not found.
111 for (let [name, value] of Object.entries(this.DATA_TYPE)) {
121 * Asynchronously validates some page data against the expected schema. Throws
122 * an exception if validation fails.
124 * @param {DATA_TYPE} type
125 * The data type from the DATA_TYPE enumeration
126 * @param {object} data
129 async validateData(type, data) {
130 let name = this.nameForType(type);
133 throw new Error(`Unknown data type ${type}`);
136 await validateData(name, data);
140 * Asynchronously validates an entire PageData structure. Any invalid or
141 * unknown data types are dropped.
143 * @param {PageData} pageData
146 * @returns {PageData} The validated page data structure
148 async validatePageData(pageData) {
149 let { data: dataMap = {}, ...general } = pageData;
151 await validateData("general", general);
155 for (let [type, data] of Object.entries(dataMap)) {
156 let name = this.nameForType(type);
157 // Ignore unknown types here.
163 await validateData(name, data);
165 validData[type] = data;
167 // Invalid data is dropped.
178 * Adds new page data into an existing data set. Any existing data is not
181 * @param {PageData} existingPageData
182 * The existing page data
183 * @param {PageData} newPageData
186 * @returns {PageData} The joined data.
188 coalescePageData(existingPageData, newPageData) {
189 // Split out the general data from the map of specific data.
190 let { data: existingMap = {}, ...existingGeneral } = existingPageData;
191 let { data: newMap = {}, ...newGeneral } = newPageData;
193 Object.assign(newGeneral, existingGeneral);
196 for (let [type, data] of Object.entries(existingMap)) {
197 if (type in newMap) {
198 dataMap[type] = Object.assign({}, newMap[type], data);
200 dataMap[type] = data;
204 for (let [type, data] of Object.entries(newMap)) {
205 if (!(type in dataMap)) {
206 dataMap[type] = data;
217 * Collects page data from a DOM document.
219 * @param {Document} document
220 * The DOM document to collect data from
222 * @returns {Promise<PageData | null>} The data collected or null in case of
225 async collectPageData(document) {
226 lazy.logConsole.debug("Starting collection", document.documentURI);
228 let pending = lazy.DATA_COLLECTORS.map(async collector => {
230 return await collector.collect(document);
232 lazy.logConsole.error("Error collecting page data", e);
237 let pageDataList = await Promise.all(pending);
239 let pageData = pageDataList.reduce(PageDataSchema.coalescePageData, {
241 url: document.documentURI,
245 return this.validatePageData(pageData);
247 lazy.logConsole.error("Failed to collect valid page data", e);