1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
22 #include <o3tl/safeint.hxx>
23 #include <breakiterator_th.hxx>
26 using namespace ::com::sun::star
;
27 using namespace ::com::sun::star::uno
;
28 using namespace ::com::sun::star::i18n
;
29 using namespace ::com::sun::star::lang
;
36 BreakIterator_th::BreakIterator_th() :
39 cBreakIterator
= "com.sun.star.i18n.BreakIterator_th";
40 // to improve performance, alloc big enough memory in construct.
41 m_aNextCellIndex
.assign(512, 0);
42 m_aPreviousCellIndex
.assign(512, 0);
49 BreakIterator_th::~BreakIterator_th()
53 sal_Int32 SAL_CALL
BreakIterator_th::previousCharacters( const OUString
& Text
,
54 sal_Int32 nStartPos
, const lang::Locale
& rLocale
,
55 sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
57 if (nCharacterIteratorMode
== CharacterIteratorMode::SKIPCELL
) {
59 if (nStartPos
> 0) { // for others to skip cell.
60 makeIndex(Text
, nStartPos
);
62 if (m_aNextCellIndex
[nStartPos
-1] == 0) // not a CTL character
63 return BreakIterator_Unicode::previousCharacters(Text
, nStartPos
, rLocale
,
64 nCharacterIteratorMode
, nCount
, nDone
);
65 else while (nCount
> 0 && m_aNextCellIndex
[nStartPos
- 1] > 0) {
67 nStartPos
= m_aPreviousCellIndex
[nStartPos
- 1];
71 } else { // for BS to delete one char.
72 for (nDone
= 0; nDone
< nCount
&& nStartPos
> 0; nDone
++)
73 Text
.iterateCodePoints(&nStartPos
, -1);
79 sal_Int32 SAL_CALL
BreakIterator_th::nextCharacters(const OUString
& Text
,
80 sal_Int32 nStartPos
, const lang::Locale
& rLocale
,
81 sal_Int16 nCharacterIteratorMode
, sal_Int32 nCount
, sal_Int32
& nDone
)
83 sal_Int32 len
= Text
.getLength();
84 if (nCharacterIteratorMode
== CharacterIteratorMode::SKIPCELL
) {
86 if (nStartPos
< len
) {
87 makeIndex(Text
, nStartPos
);
89 if (m_aNextCellIndex
[nStartPos
] == 0) // not a CTL character
90 return BreakIterator_Unicode::nextCharacters(Text
, nStartPos
, rLocale
,
91 nCharacterIteratorMode
, nCount
, nDone
);
92 else while (nCount
> 0 && m_aNextCellIndex
[nStartPos
] > 0) {
94 nStartPos
= m_aNextCellIndex
[nStartPos
];
99 for (nDone
= 0; nDone
< nCount
&& nStartPos
< Text
.getLength(); nDone
++)
100 Text
.iterateCodePoints(&nStartPos
);
106 // Make sure line is broken on cell boundary if we implement cell iterator.
107 LineBreakResults SAL_CALL
BreakIterator_th::getLineBreak(
108 const OUString
& Text
, sal_Int32 nStartPos
,
109 const lang::Locale
& rLocale
, sal_Int32 nMinBreakPos
,
110 const LineBreakHyphenationOptions
& hOptions
,
111 const LineBreakUserOptions
& bOptions
)
113 LineBreakResults lbr
= BreakIterator_Unicode::getLineBreak(Text
, nStartPos
,
114 rLocale
, nMinBreakPos
, hOptions
, bOptions
);
115 if (lbr
.breakIndex
< Text
.getLength()) {
116 makeIndex(Text
, lbr
.breakIndex
);
117 lbr
.breakIndex
= m_aPreviousCellIndex
[ lbr
.breakIndex
];
122 #define SARA_AM 0x0E33
125 * cell composition states
128 #define ST_COM 1 // Compose the following character with leading char and display in the same cell
129 #define ST_NXT 2 // display the following character in the next cell
130 #define ST_NDP 3 // non-display
132 const sal_Int16 thaiCompRel
[MAX_CT
][MAX_CT
] = {
133 // C N C L F F F B B B T A A A A A A
134 // T O O V V V V V V D O D D D V V V
135 // R N N 1 2 3 1 2 N 1 2 3 1 2 3
137 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
138 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // CTRL 0
139 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // NON 1
140 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_COM
, ST_COM
, ST_COM
, ST_COM
, ST_COM
, ST_COM
, ST_COM
, ST_COM
, ST_COM
, ST_COM
}, // CONS 2
141 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // LV 3
142 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // FV1 4
143 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // FV2 5
144 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // FV3 6
145 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_COM
, ST_COM
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // BV1 7
146 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_COM
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // BV2 8
147 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // BD 9
148 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // TONE 10
149 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // AD1 11
150 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // AD2 12
151 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // AD3 13
152 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_COM
, ST_COM
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // AV1 14
153 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_COM
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
}, // AV2 15
154 { ST_NDP
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
, ST_COM
, ST_NXT
, ST_COM
, ST_NXT
, ST_NXT
, ST_NXT
, ST_NXT
} // AV3 16
158 const sal_uInt32 is_ST_COM
= (1<<CT_CTRL
)|(1<<CT_NON
)|(1<<CT_CONS
)|(1<<CT_TONE
);
160 static sal_uInt16
getCombState(const sal_Unicode
*text
, sal_Int32 pos
)
162 sal_uInt16 ch1
= getCharType(text
[pos
]);
163 sal_uInt16 ch2
= getCharType(text
[pos
+1]);
165 if (text
[pos
+1] == SARA_AM
) {
166 if ((1 << ch1
) & is_ST_COM
)
172 return thaiCompRel
[ch1
][ch2
];
176 static sal_Int32
getACell(const sal_Unicode
*text
, sal_Int32 pos
, sal_Int32 len
)
179 for (; pos
+ 1 < len
&& getCombState(text
, pos
) == ST_COM
; curr
++, pos
++) {}
183 #define is_Thai(c) (0x0e00 <= c && c <= 0x0e7f) // Unicode definition for Thai
185 void BreakIterator_th::makeIndex(const OUString
& Text
, sal_Int32
const nStartPos
)
187 if (Text
!= cachedText
) {
189 if (m_aNextCellIndex
.size() < o3tl::make_unsigned(cachedText
.getLength())) {
190 m_aNextCellIndex
.resize(cachedText
.getLength());
191 m_aPreviousCellIndex
.resize(cachedText
.getLength());
193 // reset nextCell for new Text
194 m_aNextCellIndex
.assign(cachedText
.getLength(), 0);
196 else if (nStartPos
>= Text
.getLength() || m_aNextCellIndex
[nStartPos
] > 0
197 || !is_Thai(Text
[nStartPos
]))
200 const sal_Unicode
* str
= cachedText
.getStr();
201 sal_Int32
const len
= cachedText
.getLength();
203 sal_Int32 startPos
= nStartPos
;
204 while (startPos
> 0 && is_Thai(str
[startPos
-1])) startPos
--;
205 sal_Int32 endPos
= nStartPos
;
206 while (endPos
< len
&& is_Thai(str
[endPos
])) endPos
++;
208 sal_Int32 start
, end
, pos
;
209 pos
= start
= end
= startPos
;
211 assert(endPos
>= 0 && o3tl::make_unsigned(endPos
) <= m_aNextCellIndex
.size());
212 while (pos
< endPos
) {
213 end
+= getACell(str
, start
, endPos
);
214 assert(end
>= 0 && o3tl::make_unsigned(end
) <= m_aNextCellIndex
.size());
216 m_aNextCellIndex
[pos
] = end
;
217 m_aPreviousCellIndex
[pos
] = start
;
226 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */