nss: upgrade to release 3.73
[LibreOffice.git] / i18npool / source / breakiterator / breakiterator_th.cxx
blob659a50e44e703e08d11200dbdbab16d9206980d0
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
22 #include <o3tl/safeint.hxx>
23 #include <breakiterator_th.hxx>
24 #include <wtt.h>
26 using namespace ::com::sun::star;
27 using namespace ::com::sun::star::uno;
28 using namespace ::com::sun::star::i18n;
29 using namespace ::com::sun::star::lang;
31 namespace i18npool {
33 /**
34 * Constructor.
36 BreakIterator_th::BreakIterator_th() :
37 cachedText()
39 cBreakIterator = "com.sun.star.i18n.BreakIterator_th";
40 // to improve performance, alloc big enough memory in construct.
41 m_aNextCellIndex.assign(512, 0);
42 m_aPreviousCellIndex.assign(512, 0);
43 lineRule=nullptr;
46 /**
47 * Deconstructor.
49 BreakIterator_th::~BreakIterator_th()
53 sal_Int32 SAL_CALL BreakIterator_th::previousCharacters( const OUString& Text,
54 sal_Int32 nStartPos, const lang::Locale& rLocale,
55 sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
57 if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) {
58 nDone = 0;
59 if (nStartPos > 0) { // for others to skip cell.
60 makeIndex(Text, nStartPos);
62 if (m_aNextCellIndex[nStartPos-1] == 0) // not a CTL character
63 return BreakIterator_Unicode::previousCharacters(Text, nStartPos, rLocale,
64 nCharacterIteratorMode, nCount, nDone);
65 else while (nCount > 0 && m_aNextCellIndex[nStartPos - 1] > 0) {
66 nCount--; nDone++;
67 nStartPos = m_aPreviousCellIndex[nStartPos - 1];
69 } else
70 nStartPos = 0;
71 } else { // for BS to delete one char.
72 for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
73 Text.iterateCodePoints(&nStartPos, -1);
76 return nStartPos;
79 sal_Int32 SAL_CALL BreakIterator_th::nextCharacters(const OUString& Text,
80 sal_Int32 nStartPos, const lang::Locale& rLocale,
81 sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone)
83 sal_Int32 len = Text.getLength();
84 if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) {
85 nDone = 0;
86 if (nStartPos < len) {
87 makeIndex(Text, nStartPos);
89 if (m_aNextCellIndex[nStartPos] == 0) // not a CTL character
90 return BreakIterator_Unicode::nextCharacters(Text, nStartPos, rLocale,
91 nCharacterIteratorMode, nCount, nDone);
92 else while (nCount > 0 && m_aNextCellIndex[nStartPos] > 0) {
93 nCount--; nDone++;
94 nStartPos = m_aNextCellIndex[nStartPos];
96 } else
97 nStartPos = len;
98 } else {
99 for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
100 Text.iterateCodePoints(&nStartPos);
103 return nStartPos;
106 // Make sure line is broken on cell boundary if we implement cell iterator.
107 LineBreakResults SAL_CALL BreakIterator_th::getLineBreak(
108 const OUString& Text, sal_Int32 nStartPos,
109 const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
110 const LineBreakHyphenationOptions& hOptions,
111 const LineBreakUserOptions& bOptions )
113 LineBreakResults lbr = BreakIterator_Unicode::getLineBreak(Text, nStartPos,
114 rLocale, nMinBreakPos, hOptions, bOptions );
115 if (lbr.breakIndex < Text.getLength()) {
116 makeIndex(Text, lbr.breakIndex);
117 lbr.breakIndex = m_aPreviousCellIndex[ lbr.breakIndex ];
119 return lbr;
122 #define SARA_AM 0x0E33
125 * cell composition states
128 #define ST_COM 1 // Compose the following character with leading char and display in the same cell
129 #define ST_NXT 2 // display the following character in the next cell
130 #define ST_NDP 3 // non-display
132 const sal_Int16 thaiCompRel[MAX_CT][MAX_CT] = {
133 // C N C L F F F B B B T A A A A A A
134 // T O O V V V V V V D O D D D V V V
135 // R N N 1 2 3 1 2 N 1 2 3 1 2 3
136 // L S E
137 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
138 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // CTRL 0
139 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // NON 1
140 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM, ST_COM }, // CONS 2
141 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // LV 3
142 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV1 4
143 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV2 5
144 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // FV3 6
145 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV1 7
146 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BV2 8
147 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // BD 9
148 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // TONE 10
149 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD1 11
150 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD2 12
151 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AD3 13
152 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV1 14
153 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT }, // AV2 15
154 { ST_NDP, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_NXT, ST_COM, ST_NXT, ST_COM, ST_NXT, ST_NXT, ST_NXT, ST_NXT } // AV3 16
158 const sal_uInt32 is_ST_COM = (1<<CT_CTRL)|(1<<CT_NON)|(1<<CT_CONS)|(1<<CT_TONE);
160 static sal_uInt16 getCombState(const sal_Unicode *text, sal_Int32 pos)
162 sal_uInt16 ch1 = getCharType(text[pos]);
163 sal_uInt16 ch2 = getCharType(text[pos+1]);
165 if (text[pos+1] == SARA_AM) {
166 if ((1 << ch1) & is_ST_COM)
167 return ST_COM;
168 else
169 ch2 = CT_AD1;
172 return thaiCompRel[ch1][ch2];
176 static sal_Int32 getACell(const sal_Unicode *text, sal_Int32 pos, sal_Int32 len)
178 sal_uInt32 curr = 1;
179 for (; pos + 1 < len && getCombState(text, pos) == ST_COM; curr++, pos++) {}
180 return curr;
183 #define is_Thai(c) (0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai
185 void BreakIterator_th::makeIndex(const OUString& Text, sal_Int32 const nStartPos)
187 if (Text != cachedText) {
188 cachedText = Text;
189 if (m_aNextCellIndex.size() < o3tl::make_unsigned(cachedText.getLength())) {
190 m_aNextCellIndex.resize(cachedText.getLength());
191 m_aPreviousCellIndex.resize(cachedText.getLength());
193 // reset nextCell for new Text
194 m_aNextCellIndex.assign(cachedText.getLength(), 0);
196 else if (nStartPos >= Text.getLength() || m_aNextCellIndex[nStartPos] > 0
197 || !is_Thai(Text[nStartPos]))
198 return;
200 const sal_Unicode* str = cachedText.getStr();
201 sal_Int32 const len = cachedText.getLength();
203 sal_Int32 startPos = nStartPos;
204 while (startPos > 0 && is_Thai(str[startPos-1])) startPos--;
205 sal_Int32 endPos = nStartPos;
206 while (endPos < len && is_Thai(str[endPos])) endPos++;
208 sal_Int32 start, end, pos;
209 pos = start = end = startPos;
211 assert(endPos >= 0 && o3tl::make_unsigned(endPos) <= m_aNextCellIndex.size());
212 while (pos < endPos) {
213 end += getACell(str, start, endPos);
214 assert(end >= 0 && o3tl::make_unsigned(end) <= m_aNextCellIndex.size());
215 while (pos < end) {
216 m_aNextCellIndex[pos] = end;
217 m_aPreviousCellIndex[pos] = start;
218 pos++;
220 start = end;
226 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */