packages/docs-shared/lib/WordCount/CharacterUtils.ts

   1 import type { CodePointRange } from './WordCountTypes'
   2 import * as WordDelimeters from './WordDelimiters'
   3
   4 export const toCodePoint = (str: string) => str.codePointAt(0)
   5
   6 const wordDelimiters = new Set(Object.values(WordDelimeters).map(toCodePoint))
   7
   8 export const isWordDelimiter = (charCode: number | undefined) => charCode && wordDelimiters.has(charCode)
   9
  10 export const isWhitespace = (char: string) => char.trim() !== char
  11
  12 export const isWithinCodePointRange = (codePoint: number, [start, end]: [number, number]) =>
  13   codePoint >= start && codePoint <= end
  14
  15 export const isKana = (codePoint: number) => {
  16   const kanaRanges: CodePointRange[] = [
  17     // Hiragana
  18     [0x3040, 0x309f],
  19     // Katakana
  20     [0x30a0, 0x30ff],
  21   ]
  22   return kanaRanges.some((codePointRange) => isWithinCodePointRange(codePoint, codePointRange))
  23 }
  24
  25 export const isCJK = (codePoint: number) => {
  26   const cjkRanges: CodePointRange[] = [
  27     // CJK Unified Ideographs
  28     [0x4e00, 0x9fff],
  29     // CJK Unified Ideographs Extension A
  30     [0x3400, 0x4dbf],
  31     // CJK Unified Ideographs Extension B
  32     [0x20000, 0x2a6df],
  33     // CJK Unified Ideographs Extension C
  34     [0x2a700, 0x2b73f],
  35     // CJK Unified Ideographs Extension D
  36     [0x2b740, 0x2b81f],
  37     // CJK Unified Ideographs Extension E
  38     [0x2b820, 0x2ceaf],
  39     // CJK Unified Ideographs Extension F
  40     [0x2ceb0, 0x2ebef],
  41     // CJK Compatibility Ideographs
  42     [0xf900, 0xfaff],
  43     // CJK Compatibility Ideographs Supplement
  44     [0x2f800, 0x2fa1f],
  45     // CJK Symbols and Punctuation
  46     [0x3000, 0x303f],
  47     // Kana Supplement
  48     [0x1b000, 0x1b0ff],
  49     // Kana Extended-A
  50     [0x1b100, 0x1b12f],
  51   ]
  52   return cjkRanges.some((cjkRange) => isWithinCodePointRange(codePoint, cjkRange))
  53 }
  54
  55 export const isThai = (codePoint: number) => isWithinCodePointRange(codePoint, [0x0e00, 0x0e7f])
  56
  57 export const isWordCountSupported = typeof Intl?.Segmenter !== 'undefined'