1 import { createWordCountInfo } from './createWordCountInfo'
3 const countWords = (text: string) => createWordCountInfo(text).wordCount
5 test('Simple sentence', () => {
6 const text = 'The quick brown fox jumps over the lazy dog.'
7 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
8 expect(wordCount).toBe(9)
9 expect(characterCount).toBe(44)
10 expect(nonWhitespaceCharacterCount).toBe(36)
13 test('Different separators', () => {
15 'Is this a question? Here I am *emphasising* this word. Here: are some comma,separated,words and comma, separated, words'
16 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
18 expect(wordCount).toBe(20)
19 expect(characterCount).toBe(119)
20 expect(nonWhitespaceCharacterCount).toBe(102)
23 test('Em dashes delimit words', () => {
24 const text = 'Btw—hello—world—foo'
26 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
28 expect(wordCount).toBe(4)
29 expect(characterCount).toBe(19)
30 expect(nonWhitespaceCharacterCount).toBe(19)
33 test('Special characters and symbols', () => {
34 const text = 'The cost is $5.99 @store #bigDeal'
36 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
38 /** Matches other online word counters and Docs products*/
39 expect(wordCount).toBe(7)
40 expect(characterCount).toBe(33)
41 expect(nonWhitespaceCharacterCount).toBe(28)
44 test('Numbers in words', () => {
45 const text = 'The chemical formula for water is H2O and the pandemic is COVID-19'
46 const { wordCount } = createWordCountInfo(text)
48 expect(wordCount).toBe(12)
51 test('Accented Characters and diacritics', () => {
52 const text = 'Café culture is thriving in the city, with many façades reflecting a historical charm.'
53 const { wordCount } = createWordCountInfo(text)
55 expect(wordCount).toBe(14)
58 test('Long text and paragraphs', () => {
59 // simulate a large gap in text
60 const longText = 'This is a long paragraph. ' + ' '.repeat(1000) + 'Another sentence.'
61 const { wordCount } = createWordCountInfo(longText)
63 expect(wordCount).toBe(7)
66 test('Consecutive punctuation marks', () => {
67 const text = 'Wait... what?!!! This is incredible...'
68 const { wordCount } = createWordCountInfo(text)
70 expect(wordCount).toBe(5)
73 test('URLs and email addresses', () => {
74 const text = 'Please visit our site at https://example.com or contact us at info@example.com'
75 const { wordCount } = createWordCountInfo(text)
77 /** Matches other online word counters and Docs products*/
78 expect(wordCount).toBe(15)
81 test('Abbreviations and acronyms', () => {
82 const text = 'I live in the U.S.A. and work for NASA.'
83 const { wordCount } = createWordCountInfo(text)
85 /** Matches other online word counters and Docs products*/
86 expect(wordCount).toBe(11)
89 test('Hyphens do not delimit words', () => {
90 const text = 'Btw-hello-world-foo'
92 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
94 expect(wordCount).toBe(1)
95 expect(characterCount).toBe(19)
96 expect(nonWhitespaceCharacterCount).toBe(19)
99 test('Whitespace', () => {
100 const text = 'The quick brown fox\njumps\tover the lazy dog'
101 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
103 expect(wordCount).toBe(9)
104 expect(characterCount).toBe(54)
105 expect(nonWhitespaceCharacterCount).toBe(35)
108 test('Leading and trailing whitespace', () => {
109 expect(createWordCountInfo(' The quick brown fox jumps over the lazy dog ').wordCount).toBe(9)
112 test('The\u00a0quick\u00a0brown\u00a0fox\u00a0jumps\u00a0over\u00a0the\u00a0lazy\u00a0dog', () => {
113 const text = 'The\u00a0quick\u00a0brown\u00a0fox\u00a0jumps\u00a0over\u00a0the\u00a0lazy\u00a0dog'
115 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
117 expect(wordCount).toBe(9)
118 expect(characterCount).toBe(43)
119 expect(nonWhitespaceCharacterCount).toBe(35)
122 test('contractions with apostrophe should not count as two separate words', () => {
123 const text = "shouldn't couldn't wouldn't"
125 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
127 expect(wordCount).toBe(3)
128 expect(characterCount).toBe(27)
129 expect(nonWhitespaceCharacterCount).toBe(25)
132 test('Spanish', () => {
134 createWordCountInfo('¿Qué opinas de las nuevas reformas? Me gustan los cambios, pero a veces son complicados.')
139 test('Arabic sentence', () => {
140 const sentence = 'أنا أحب تعلم اللغات! وأنت؟'
141 expect(countWords(sentence)).toBe(5)
144 // Example test for Korean text
145 test('Korean sentence', () => {
146 const sentence = '나는 언어를 배우는 것을 좋아해요! 당신은요?'
147 expect(countWords(sentence)).toBe(6)
150 test('Hindi sentence', () => {
151 const sentence = 'मुझे भाषाएँ सीखना पसंद है। आप कैसे हैं?'
152 expect(countWords(sentence)).toBe(8)
155 test('Portuguese sentence', () => {
156 const sentence = 'Eu gosto de aprender línguas. E você, gosta?'
157 expect(countWords(sentence)).toBe(8)
160 test('Bengali sentence', () => {
161 const sentence = 'আমি ভাষা শেখা পছন্দ করি। আপনি কেমন আছেন?'
162 expect(countWords(sentence)).toBe(8)
165 test('Russian sentence', () => {
166 const sentence = 'Мне нравится учить языки. А вам?'
167 expect(countWords(sentence)).toBe(6)
170 test('French sentence', () => {
171 const sentence = 'J’aime apprendre des langues ! Et toi ?'
172 expect(countWords(sentence)).toBe(6)
175 test('German sentence', () => {
176 const sentence = 'Ich mag Sprachen lernen. Und du?'
177 expect(countWords(sentence)).toBe(6)
180 test('Vietnamese sentence', () => {
181 const sentence = 'Tôi thích học các ngôn ngữ! Bạn thì sao?'
182 expect(countWords(sentence)).toBe(9)
185 test('Urdu sentence', () => {
186 const sentence = 'مجھے زبانیں سیکھنا پسند ہے۔ آپ کیسے ہیں؟'
187 expect(countWords(sentence)).toBe(8)
190 test('Turkish sentence', () => {
191 const sentence = 'Dilleri öğrenmeyi seviyorum! Sen ne düşünüyorsun?'
192 expect(countWords(sentence)).toBe(6) // "Dilleri", "öğrenmeyi", "seviyorum", "Sen", "ne", "düşünüyorsun"
195 test('Italian sentence', () => {
196 const sentence = 'Mi piace imparare le lingue. E tu?'
197 expect(countWords(sentence)).toBe(7)
200 test('Persian (Farsi) sentence', () => {
201 const sentence = 'من یادگیری زبانها را دوست دارم! شما چطور؟'
202 expect(countWords(sentence)).toBe(8)
205 test('Polish sentence', () => {
206 const sentence = 'Lubię uczyć się języków. A ty?'
207 expect(countWords(sentence)).toBe(6)
210 test('Tamil sentence', () => {
211 const sentence = 'நான் மொழிகள் கற்க விரும்புகிறேன்! நீங்கள் எப்படி?'
212 expect(countWords(sentence)).toBe(6)
215 // // Non-Whitespace-Separated Languages
216 test('Mandarin Chinese sentence', () => {
217 const sentence = '我喜欢学习语言。你呢?'
218 expect(countWords(sentence)).toBe(9)
219 expect(countWords('天地玄黄,宇宙洪荒。日月盈昃,辰宿列张。寒来暑往,秋收冬藏。')).toBe(24)
222 test('Japanese', () => {
223 const text = '速い茶色の狐が怠け者の犬の上を飛び越えます。'
224 const text2 = 'しい改革についてどう思いますか?'
225 const text3 = '変化は好きですが、時々複雑です。'
226 const text4 = 'しい改革についてどう思いますか?'
228 expect(countWords(text)).toBe(21)
229 expect(countWords(text2)).toBe(15)
230 expect(countWords(text3)).toBe(14)
231 expect(countWords(text4)).toBe(15)
234 test('Thai sentence', () => {
235 const sentence = 'ฉันชอบเรียนภาษาไทย! คุณล่ะ?'
236 expect(countWords(sentence)).toBe(20)
239 test('Emojis including emoji sequences', () => {
241 'The quick brown fox jumps over the lazy dog 🦊. Look at this cool rocket and astronaut 1️⃣ 🚀👨🚀! 🌈✨🚀 😎'
243 const { wordCount, characterCount, nonWhitespaceCharacterCount } = createWordCountInfo(text)
244 expect(wordCount).toBe(21)
245 expect(characterCount).toBe(97)
246 expect(nonWhitespaceCharacterCount).toBe(77)