1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 ChromeUtils.defineESModuleGetters(lazy, {
8 FilterAdult: "resource:///modules/FilterAdult.sys.mjs",
9 UrlbarUtils: "resource:///modules/UrlbarUtils.sys.mjs",
12 ChromeUtils.defineLazyGetter(lazy, "logConsole", function () {
13 return console.createInstance({
14 prefix: "InteractionsBlocklist",
15 maxLogLevel: Services.prefs.getBoolPref(
16 "browser.places.interactions.log",
24 // A blocklist of regular expressions. Maps base hostnames to a list regular
25 // expressions for URLs with that base hostname. In this context, "base
26 // hostname" means the hostname without any subdomains or a public suffix. For
27 // example, the base hostname for "https://www.maps.google.com/a/place" is
28 // "google". We do this mapping to improve performance; otherwise we'd have to
29 // check all URLs against a long list of regular expressions. The regexes are
30 // defined as escaped strings so that we build them lazily.
31 // We may want to migrate this list to Remote Settings in the future.
32 let HOST_BLOCKLIST = {
35 // XXX: Used alone this could produce false positives where an auth0 URL
36 // appears after another valid domain and TLD, but since we limit this to
37 // the auth0 hostname those occurrences will be filtered out.
38 "^https:\\/\\/.*\\.auth0\\.com\\/login",
42 "^(https?:\\/\\/)?(www\\.)?baidu\\.com\\/s.*(\\?|&)wd=.*",
46 "^(https?:\\/\\/)?(www\\.)?bing\\.com\\/search.*(\\?|&)q=.*",
50 "^(https?:\\/\\/)?(www\\.)?duckduckgo\\.com\\/.*(\\?|&)q=.*",
54 "^(https?:\\/\\/)?(www\\.)?google\\.(\\w|\\.){2,}\\/search.*(\\?|&)q=.*",
56 "^https:\\/\\/accounts\\.google\\.com\\/o\\/oauth2\\/v2\\/auth",
57 "^https:\\/\\/accounts\\.google\\.com\\/signin\\/oauth\\/consent",
61 "^https:\\/\\/login\\.microsoftonline\\.com\\/common\\/oauth2\\/v2\\.0\\/authorize",
65 "^(https?:\\/\\/)?(www\\.)?yandex\\.(\\w|\\.){2,}\\/search.*(\\?|&)text=.*",
68 // Zoom meeting interstitial
69 "^(https?:\\/\\/)?(www\\.)?.*\\.zoom\\.us\\/j\\/\\d+",
73 HOST_BLOCKLIST = new Proxy(HOST_BLOCKLIST, {
74 get(target, property) {
75 let regexes = target[property];
76 if (!regexes || !Array.isArray(regexes)) {
80 for (let i = 0; i < regexes.length; i++) {
81 let regex = regexes[i];
82 if (typeof regex === "string") {
83 regex = new RegExp(regex, "i");
87 throw new Error("Blocklist contains invalid regex.");
96 * A class that maintains a blocklist of URLs. The class exposes a method to
97 * check if a particular URL is contained on the blocklist.
99 class _InteractionsBlocklist {
101 // Load custom blocklist items from pref.
103 let customBlocklist = JSON.parse(
104 Services.prefs.getStringPref(
105 "places.interactions.customBlocklist",
109 if (!Array.isArray(customBlocklist)) {
112 let parsedBlocklist = customBlocklist.map(
113 regexStr => new RegExp(regexStr)
115 HOST_BLOCKLIST["*"] = parsedBlocklist;
117 lazy.logConsole.warn("places.interactions.customBlocklist is corrupted.");
122 * Only certain urls can be added as Interactions, either manually or
125 * @returns {Map} A Map keyed by protocol, for each protocol an object may
126 * define stricter requirements, like extension.
128 get urlRequirements() {
132 ["file:", { extension: "pdf" }],
137 * Whether to record interactions for a given URL.
138 * The rules are defined in InteractionsBlocklist.urlRequirements.
140 * @param {string|URL|nsIURI} url The URL to check.
141 * @returns {boolean} whether the url can be recorded.
144 let protocol, pathname;
145 if (typeof url == "string") {
148 if (url instanceof Ci.nsIURI) {
149 protocol = url.scheme + ":";
150 pathname = url.filePath;
152 protocol = url.protocol;
153 pathname = url.pathname;
155 let requirements = InteractionsBlocklist.urlRequirements.get(protocol);
158 (!requirements.extension || pathname.endsWith(requirements.extension))
163 * Checks a URL against a blocklist of URLs. If the URL is blocklisted, we
164 * should not record an interaction.
166 * @param {string} urlToCheck
167 * The URL we are looking for on the blocklist.
169 * True if `url` is on a blocklist. False otherwise.
171 isUrlBlocklisted(urlToCheck) {
172 if (lazy.FilterAdult.isAdultUrl(urlToCheck)) {
176 if (!this.canRecordUrl(urlToCheck)) {
180 // First, find the URL's base host: the hostname without any subdomains or a
184 url = new URL(urlToCheck);
189 lazy.logConsole.warn(
190 `Invalid URL passed to InteractionsBlocklist.isUrlBlocklisted: ${url}`
195 if (url.protocol == "file:") {
199 let hostWithoutSuffix = lazy.UrlbarUtils.stripPublicSuffixFromHost(
202 let [hostWithSubdomains] = lazy.UrlbarUtils.stripPrefixAndTrim(
206 trimTrailingDot: true,
209 let baseHost = hostWithSubdomains.substring(
210 hostWithSubdomains.lastIndexOf(".") + 1
212 // Then fetch blocked regexes for that baseHost and compare them to the full
213 // URL. Also check the URL against the custom blocklist.
214 let regexes = HOST_BLOCKLIST[baseHost.toLocaleLowerCase()] || [];
215 regexes.push(...(HOST_BLOCKLIST["*"] || []));
220 return regexes.some(r => r.test(url.href));
224 * Adds a regex to HOST_BLOCKLIST. Since we can't parse the base host from
225 * the regex, we add it to a list of wildcard regexes. All URLs are checked
226 * against these wildcard regexes. Currently only exposed for tests and use in
227 * the console. In the future we could hook this up to a UI component.
229 * @param {string|RegExp} regexToAdd
230 * The regular expression to add to our blocklist.
232 addRegexToBlocklist(regexToAdd) {
235 regex = new RegExp(regexToAdd, "i");
237 this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
241 if (!HOST_BLOCKLIST["*"]) {
242 HOST_BLOCKLIST["*"] = [];
244 HOST_BLOCKLIST["*"].push(regex);
245 Services.prefs.setStringPref(
246 "places.interactions.customBlocklist",
247 JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
252 * Removes a regex from HOST_BLOCKLIST. If `regexToRemove` is not in the
253 * blocklist, this is a no-op. Currently only exposed for tests and use in the
254 * console. In the future we could hook this up to a UI component.
256 * @param {string|RegExp} regexToRemove
257 * The regular expression to add to our blocklist.
259 removeRegexFromBlocklist(regexToRemove) {
262 regex = new RegExp(regexToRemove, "i");
264 this.logConsole.warn("Invalid regex passed to addRegexToBlocklist.");
268 if (!HOST_BLOCKLIST["*"] || !Array.isArray(HOST_BLOCKLIST["*"])) {
271 HOST_BLOCKLIST["*"] = HOST_BLOCKLIST["*"].filter(
272 curr => curr.source != regex.source
274 Services.prefs.setStringPref(
275 "places.interactions.customBlocklist",
276 JSON.stringify(HOST_BLOCKLIST["*"].map(reg => reg.toString()))
281 export const InteractionsBlocklist = new _InteractionsBlocklist();