packages/shared/lib/sanitize/escape.ts

   1 /*
   2  * This is valid
   3  * - background:&#117;r&#108;(
   4  * - background:&#117;r&#108;(
   5  * - background:url&lpar;
   6  * - etc.
   7  */
   8 const CSS_URL = '((url|image-set)(\\(|&(#40|#x00028|lpar);))';
   9 const REGEXP_URL_ATTR = new RegExp(CSS_URL, 'gi');
  10
  11 const REGEXP_HEIGHT_PERCENTAGE = /((?:min-|max-|line-)?height)\s*:\s*([\d.,]+%)/gi;
  12 const REGEXP_POSITION_ABSOLUTE = /position\s*:\s*absolute/gi;
  13 const REGEXP_MEDIA_DARK_STYLE_2 = /Color-scheme/gi;
  14
  15 const HTML_ESCAPES: [search: string, replace: string][] = [
  16     ['&', '&amp;'],
  17     ['<', '&lt;'],
  18     ['>', '&gt;'],
  19     ['"', '&quot;'],
  20     ["'", '&#39;'],
  21 ];
  22
  23 const HTML_UNESCAPES: [search: string, replace: string][] = HTML_ESCAPES.map(([a, b]) => [b, a]);
  24
  25 export const escape = (string: string) => {
  26     HTML_ESCAPES.forEach(([search, replace]) => {
  27         string = string.replaceAll(search, replace);
  28     });
  29
  30     return string;
  31 };
  32
  33 export const unescape = (string: string) => {
  34     HTML_UNESCAPES.forEach(([search, replace]) => {
  35         string = string.replaceAll(search, replace);
  36     });
  37
  38     return string;
  39 };
  40
  41 /**
  42  * Unescape a string in hex or octal encoding.
  43  * See https://www.w3.org/International/questions/qa-escapes#css_other for all possible cases.
  44  */
  45 export const unescapeCSSEncoding = (str: string) => {
  46     // Regexp declared inside the function to reset its state (because of the global flag).
  47     // cf https://stackoverflow.com/questions/1520800/why-does-a-regexp-with-global-flag-give-wrong-results
  48     const UNESCAPE_CSS_ESCAPES_REGEX = /\\([0-9A-Fa-f]{1,6}) ?/g;
  49     const UNESCAPE_HTML_DEC_REGEX = /&#(\d+)(;|(?=[^\d;]))/g;
  50     const UNESCAPE_HTML_HEX_REGEX = /&#x([0-9A-Fa-f]+)(;|(?=[^\d;]))/g;
  51     const OTHER_ESC = /\\(.)/g;
  52
  53     const handleEscape = (radix: number) => (ignored: any, val: string) => {
  54         try {
  55             return String.fromCodePoint(Number.parseInt(val, radix));
  56         } catch {
  57             // Unescape regexps have some limitations, for those rare situations, fromCodePoint can throw
  58             // One real found is: `font-family:\2018Calibri`
  59             return '';
  60         }
  61     };
  62
  63     /*
  64      * basic unescaped named sequences: &amp; etcetera, lodash does not support a lot, but that is not a problem for our case.
  65      * Actually handling all escaped sequences would mean keeping track of a very large and ever growing amount of named sequences
  66      */
  67     const namedUnescaped = unescape(str);
  68     // lodash doesn't unescape &#160; or &#xA0; sequences, we have to do this manually:
  69     const decUnescaped = namedUnescaped.replace(UNESCAPE_HTML_DEC_REGEX, handleEscape(10));
  70     const hexUnescaped = decUnescaped.replace(UNESCAPE_HTML_HEX_REGEX, handleEscape(16));
  71     // unescape css backslash sequences
  72     const strUnescapedHex = hexUnescaped.replace(UNESCAPE_CSS_ESCAPES_REGEX, handleEscape(16));
  73
  74     return strUnescapedHex.replace(OTHER_ESC, (_, char) => char);
  75 };
  76
  77 /**
  78  * Input can be escaped multiple times to escape replacement while still works
  79  * Best solution I found is to escape recursively
  80  * This is done 5 times maximum. If there are too much escape, we consider the string
  81  * "invalid" and we prefer to return an empty string
  82  * @argument str style to unescape
  83  * @augments stop extra security to prevent infinite loop
  84  */
  85 export const recurringUnescapeCSSEncoding = (str: string, stop = 5): string => {
  86     const escaped = unescapeCSSEncoding(str);
  87     if (escaped === str) {
  88         return escaped;
  89     } else if (stop === 0) {
  90         return '';
  91     } else {
  92         return recurringUnescapeCSSEncoding(escaped, stop - 1);
  93     }
  94 };
  95
  96 /**
  97  * Escape some WTF from the CSSParser, cf spec files
  98  * @param  {String} style
  99  * @return {String}
 100  */
 101 export const escapeURLinStyle = (style: string) => {
 102     // handle the case where the value is html encoded, e.g.:
 103     // background:&#117;rl(&quot;https://i.imgur.com/WScAnHr.jpg&quot;)
 104
 105     const unescapedEncoding = recurringUnescapeCSSEncoding(style);
 106
 107     // If we cancelled the unescape encoding step because it was too long, we are returning an empty string.
 108     // In that case we also need to return an empty string in this function, otherwise we will not escape correctly the content
 109     if (unescapedEncoding === '') {
 110         return '';
 111     }
 112
 113     const escapeFlag = unescapedEncoding !== style;
 114
 115     const escapedStyle = unescapedEncoding.replace(/\\r/g, 'r').replace(REGEXP_URL_ATTR, 'proton-$2(');
 116
 117     if (escapedStyle === unescapedEncoding) {
 118         // nothing escaped: just return input
 119         return style;
 120     }
 121
 122     return escapeFlag ? escape(escapedStyle) : escapedStyle;
 123 };
 124
 125 export const escapeForbiddenStyle = (style: string): string => {
 126     let parsedStyle = style
 127         .replaceAll(REGEXP_POSITION_ABSOLUTE, 'position: relative')
 128         .replaceAll(REGEXP_HEIGHT_PERCENTAGE, (rule, prop) => {
 129             // Replace nothing in this case.
 130             if (['line-height', 'max-height'].includes(prop)) {
 131                 return rule;
 132             }
 133
 134             return `${prop}: unset`;
 135         })
 136         // To replace if we support dark styles in the future.
 137         // Disable the Color-scheme so that the message do not use dark mode, message always being displayed on a white bg today
 138         .replaceAll(REGEXP_MEDIA_DARK_STYLE_2, 'proton-disabled-Color-scheme');
 139
 140     return parsedStyle;
 141 };
 142
 143 const HTML_ENTITIES_TO_REMOVE_CHAR_CODES: number[] = [
 144     9, // Tab : &Tab; - &#x00009; - &#9;
 145     10, // New line : &NewLine; - &#x0000A; - &#10;
 146     173, // Soft hyphen : &shy; - &#x000AD; - &#173;
 147     8203, // Zero width space : &ZeroWidthSpace; - &NegativeVeryThinSpace; - &NegativeThinSpace; - &NegativeMediumSpace; - &NegativeThickSpace; - &#x0200B; - &#8203;
 148 ];
 149
 150 /**
 151  * Remove completely some HTML entities from a string
 152  * @param {String} string
 153  * @return {String}
 154  */
 155 export const unescapeFromString = (string: string) => {
 156     const toRemove = HTML_ENTITIES_TO_REMOVE_CHAR_CODES.map((charCode) => String.fromCharCode(charCode));
 157     const regex = new RegExp(toRemove.join('|'), 'g');
 158
 159     return string.replace(regex, '');
 160 };