Merge branch 'feat/inda-383-daily-stat' into 'main'
[ProtonMail-WebClient.git] / packages / docs-shared / lib / WordCount / CharacterUtils.ts
blob6bbf523ce05a75356894c6ac6ad92a884d29a391
1 import type { CodePointRange } from './WordCountTypes'
2 import * as WordDelimeters from './WordDelimiters'
4 export const toCodePoint = (str: string) => str.codePointAt(0)
6 const wordDelimiters = new Set(Object.values(WordDelimeters).map(toCodePoint))
8 export const isWordDelimiter = (charCode: number | undefined) => charCode && wordDelimiters.has(charCode)
10 export const isWhitespace = (char: string) => char.trim() !== char
12 export const isWithinCodePointRange = (codePoint: number, [start, end]: [number, number]) =>
13   codePoint >= start && codePoint <= end
15 export const isKana = (codePoint: number) => {
16   const kanaRanges: CodePointRange[] = [
17     // Hiragana
18     [0x3040, 0x309f],
19     // Katakana
20     [0x30a0, 0x30ff],
21   ]
22   return kanaRanges.some((codePointRange) => isWithinCodePointRange(codePoint, codePointRange))
25 export const isCJK = (codePoint: number) => {
26   const cjkRanges: CodePointRange[] = [
27     // CJK Unified Ideographs
28     [0x4e00, 0x9fff],
29     // CJK Unified Ideographs Extension A
30     [0x3400, 0x4dbf],
31     // CJK Unified Ideographs Extension B
32     [0x20000, 0x2a6df],
33     // CJK Unified Ideographs Extension C
34     [0x2a700, 0x2b73f],
35     // CJK Unified Ideographs Extension D
36     [0x2b740, 0x2b81f],
37     // CJK Unified Ideographs Extension E
38     [0x2b820, 0x2ceaf],
39     // CJK Unified Ideographs Extension F
40     [0x2ceb0, 0x2ebef],
41     // CJK Compatibility Ideographs
42     [0xf900, 0xfaff],
43     // CJK Compatibility Ideographs Supplement
44     [0x2f800, 0x2fa1f],
45     // CJK Symbols and Punctuation
46     [0x3000, 0x303f],
47     // Kana Supplement
48     [0x1b000, 0x1b0ff],
49     // Kana Extended-A
50     [0x1b100, 0x1b12f],
51   ]
52   return cjkRanges.some((cjkRange) => isWithinCodePointRange(codePoint, cjkRange))
55 export const isThai = (codePoint: number) => isWithinCodePointRange(codePoint, [0x0e00, 0x0e7f])
57 export const isWordCountSupported = typeof Intl?.Segmenter !== 'undefined'