1 import type { CodePointRange } from './WordCountTypes'
2 import * as WordDelimeters from './WordDelimiters'
4 export const toCodePoint = (str: string) => str.codePointAt(0)
6 const wordDelimiters = new Set(Object.values(WordDelimeters).map(toCodePoint))
8 export const isWordDelimiter = (charCode: number | undefined) => charCode && wordDelimiters.has(charCode)
10 export const isWhitespace = (char: string) => char.trim() !== char
12 export const isWithinCodePointRange = (codePoint: number, [start, end]: [number, number]) =>
13 codePoint >= start && codePoint <= end
15 export const isKana = (codePoint: number) => {
16 const kanaRanges: CodePointRange[] = [
22 return kanaRanges.some((codePointRange) => isWithinCodePointRange(codePoint, codePointRange))
25 export const isCJK = (codePoint: number) => {
26 const cjkRanges: CodePointRange[] = [
27 // CJK Unified Ideographs
29 // CJK Unified Ideographs Extension A
31 // CJK Unified Ideographs Extension B
33 // CJK Unified Ideographs Extension C
35 // CJK Unified Ideographs Extension D
37 // CJK Unified Ideographs Extension E
39 // CJK Unified Ideographs Extension F
41 // CJK Compatibility Ideographs
43 // CJK Compatibility Ideographs Supplement
45 // CJK Symbols and Punctuation
52 return cjkRanges.some((cjkRange) => isWithinCodePointRange(codePoint, cjkRange))
55 export const isThai = (codePoint: number) => isWithinCodePointRange(codePoint, [0x0e00, 0x0e7f])
57 export const isWordCountSupported = typeof Intl?.Segmenter !== 'undefined'