Merge branch 'feat/inda-383-daily-stat' into 'main'
[ProtonMail-WebClient.git] / packages / docs-shared / lib / WordCount / createWordCountInfo.ts
blobb91126794d6b97a1aaa65fa0243683a2cd804629
1 import {
2   isCJK,
3   isKana,
4   isThai,
5   isWhitespace,
6   isWordCountSupported,
7   isWordDelimiter,
8   toCodePoint,
9 } from './CharacterUtils'
10 import type { WordCountInfo } from './WordCountTypes'
12 const segmenter = isWordCountSupported ? new Intl.Segmenter('en', { granularity: 'grapheme' }) : undefined
14 export const createWordCountInfo = (textContent: string): WordCountInfo => {
15   let characterCount = 0
16   let wordCount = 0
17   let nonWhitespaceCharacterCount = 0
19   if (!segmenter) {
20     return { characterCount, wordCount, nonWhitespaceCharacterCount }
21   }
23   let alreadyMatched = false
24   for (const segmentData of segmenter.segment(textContent)) {
25     const char = segmentData.segment
26     const charCodePoint = toCodePoint(char)
28     const charIsWhitespace = isWhitespace(char)
30     if (!charIsWhitespace) {
31       nonWhitespaceCharacterCount++
32     }
34     characterCount++
36     if (!charCodePoint) {
37       break
38     }
40     const isMatched = !charIsWhitespace && !isWordDelimiter(charCodePoint)
41     const shouldIncrement = !alreadyMatched || isCJK(charCodePoint) || isKana(charCodePoint) || isThai(charCodePoint)
43     if (shouldIncrement && isMatched) {
44       wordCount++
45       alreadyMatched = true
46     } else if (!isMatched) {
47       alreadyMatched = false
48     }
49   }
51   return {
52     characterCount,
53     nonWhitespaceCharacterCount,
54     wordCount,
55   }