1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <transliteration_Ignore.hxx>
24 using namespace com::sun::star::uno
;
28 const sal_Unicode table_normalwidth
[] = {
30 0x3041, // 0x3041 HIRAGANA LETTER SMALL A
31 0x3042, // 0x3042 HIRAGANA LETTER A
32 0x3043, // 0x3043 HIRAGANA LETTER SMALL I
33 0x3044, // 0x3044 HIRAGANA LETTER I
34 0x3045, // 0x3045 HIRAGANA LETTER SMALL U
35 0x3046, // 0x3046 HIRAGANA LETTER U
36 0x3047, // 0x3047 HIRAGANA LETTER SMALL E
37 0x3048, // 0x3048 HIRAGANA LETTER E
38 0x3049, // 0x3049 HIRAGANA LETTER SMALL O
39 0x304a, // 0x304a HIRAGANA LETTER O
40 0x3042, // 0x304b HIRAGANA LETTER KA
41 0x3042, // 0x304c HIRAGANA LETTER GA
42 0x3044, // 0x304d HIRAGANA LETTER KI
43 0x3044, // 0x304e HIRAGANA LETTER GI
44 0x3046, // 0x304f HIRAGANA LETTER KU
45 0x3046, // 0x3050 HIRAGANA LETTER GU
46 0x3048, // 0x3051 HIRAGANA LETTER KE
47 0x3048, // 0x3052 HIRAGANA LETTER GE
48 0x304a, // 0x3053 HIRAGANA LETTER KO
49 0x304a, // 0x3054 HIRAGANA LETTER GO
50 0x3042, // 0x3055 HIRAGANA LETTER SA
51 0x3042, // 0x3056 HIRAGANA LETTER ZA
52 0x3044, // 0x3057 HIRAGANA LETTER SI
53 0x3044, // 0x3058 HIRAGANA LETTER ZI
54 0x3046, // 0x3059 HIRAGANA LETTER SU
55 0x3046, // 0x305a HIRAGANA LETTER ZU
56 0x3048, // 0x305b HIRAGANA LETTER SE
57 0x3048, // 0x305c HIRAGANA LETTER ZE
58 0x304a, // 0x305d HIRAGANA LETTER SO
59 0x304a, // 0x305e HIRAGANA LETTER ZO
60 0x3042, // 0x305f HIRAGANA LETTER TA
61 0x3042, // 0x3060 HIRAGANA LETTER DA
62 0x3044, // 0x3061 HIRAGANA LETTER TI
63 0x3044, // 0x3062 HIRAGANA LETTER DI
64 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU
65 0x3046, // 0x3064 HIRAGANA LETTER TU
66 0x3046, // 0x3065 HIRAGANA LETTER DU
67 0x3048, // 0x3066 HIRAGANA LETTER TE
68 0x3048, // 0x3067 HIRAGANA LETTER DE
69 0x304a, // 0x3068 HIRAGANA LETTER TO
70 0x304a, // 0x3069 HIRAGANA LETTER DO
71 0x3042, // 0x306a HIRAGANA LETTER NA
72 0x3044, // 0x306b HIRAGANA LETTER NI
73 0x3046, // 0x306c HIRAGANA LETTER NU
74 0x3048, // 0x306d HIRAGANA LETTER NE
75 0x304a, // 0x306e HIRAGANA LETTER NO
76 0x3042, // 0x306f HIRAGANA LETTER HA
77 0x3042, // 0x3070 HIRAGANA LETTER BA
78 0x3042, // 0x3071 HIRAGANA LETTER PA
79 0x3044, // 0x3072 HIRAGANA LETTER HI
80 0x3044, // 0x3073 HIRAGANA LETTER BI
81 0x3044, // 0x3074 HIRAGANA LETTER PI
82 0x3046, // 0x3075 HIRAGANA LETTER HU
83 0x3046, // 0x3076 HIRAGANA LETTER BU
84 0x3046, // 0x3077 HIRAGANA LETTER PU
85 0x3048, // 0x3078 HIRAGANA LETTER HE
86 0x3048, // 0x3079 HIRAGANA LETTER BE
87 0x3048, // 0x307a HIRAGANA LETTER PE
88 0x304a, // 0x307b HIRAGANA LETTER HO
89 0x304a, // 0x307c HIRAGANA LETTER BO
90 0x304a, // 0x307d HIRAGANA LETTER PO
91 0x3042, // 0x307e HIRAGANA LETTER MA
92 0x3044, // 0x307f HIRAGANA LETTER MI
93 0x3046, // 0x3080 HIRAGANA LETTER MU
94 0x3048, // 0x3081 HIRAGANA LETTER ME
95 0x304a, // 0x3082 HIRAGANA LETTER MO
96 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA
97 0x3042, // 0x3084 HIRAGANA LETTER YA
98 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU
99 0x3046, // 0x3086 HIRAGANA LETTER YU
100 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO
101 0x304a, // 0x3088 HIRAGANA LETTER YO
102 0x3042, // 0x3089 HIRAGANA LETTER RA
103 0x3044, // 0x308a HIRAGANA LETTER RI
104 0x3046, // 0x308b HIRAGANA LETTER RU
105 0x3048, // 0x308c HIRAGANA LETTER RE
106 0x304a, // 0x308d HIRAGANA LETTER RO
107 0x3041, // 0x308e HIRAGANA LETTER SMALL WA
108 0x3042, // 0x308f HIRAGANA LETTER WA
109 0x3044, // 0x3090 HIRAGANA LETTER WI
110 0x3048, // 0x3091 HIRAGANA LETTER WE
111 0x304a, // 0x3092 HIRAGANA LETTER WO
112 0x0000, // 0x3093 HIRAGANA LETTER N
113 0x3046, // 0x3094 HIRAGANA LETTER VU
118 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
119 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
120 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
121 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
122 0x0000, // 0x309d HIRAGANA ITERATION MARK
123 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK
126 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A
127 0x30a2, // 0x30a2 KATAKANA LETTER A
128 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I
129 0x30a4, // 0x30a4 KATAKANA LETTER I
130 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U
131 0x30a6, // 0x30a6 KATAKANA LETTER U
132 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E
133 0x30a8, // 0x30a8 KATAKANA LETTER E
134 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O
135 0x30aa, // 0x30aa KATAKANA LETTER O
136 0x30a2, // 0x30ab KATAKANA LETTER KA
137 0x30a2, // 0x30ac KATAKANA LETTER GA
138 0x30a4, // 0x30ad KATAKANA LETTER KI
139 0x30a4, // 0x30ae KATAKANA LETTER GI
140 0x30a6, // 0x30af KATAKANA LETTER KU
141 0x30a6, // 0x30b0 KATAKANA LETTER GU
142 0x30a8, // 0x30b1 KATAKANA LETTER KE
143 0x30a8, // 0x30b2 KATAKANA LETTER GE
144 0x30aa, // 0x30b3 KATAKANA LETTER KO
145 0x30aa, // 0x30b4 KATAKANA LETTER GO
146 0x30a2, // 0x30b5 KATAKANA LETTER SA
147 0x30a2, // 0x30b6 KATAKANA LETTER ZA
148 0x30a4, // 0x30b7 KATAKANA LETTER SI
149 0x30a4, // 0x30b8 KATAKANA LETTER ZI
150 0x30a6, // 0x30b9 KATAKANA LETTER SU
151 0x30a6, // 0x30ba KATAKANA LETTER ZU
152 0x30a8, // 0x30bb KATAKANA LETTER SE
153 0x30a8, // 0x30bc KATAKANA LETTER ZE
154 0x30aa, // 0x30bd KATAKANA LETTER SO
155 0x30aa, // 0x30be KATAKANA LETTER ZO
156 0x30a2, // 0x30bf KATAKANA LETTER TA
157 0x30a2, // 0x30c0 KATAKANA LETTER DA
158 0x30a4, // 0x30c1 KATAKANA LETTER TI
159 0x30a4, // 0x30c2 KATAKANA LETTER DI
160 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU
161 0x30a6, // 0x30c4 KATAKANA LETTER TU
162 0x30a6, // 0x30c5 KATAKANA LETTER DU
163 0x30a8, // 0x30c6 KATAKANA LETTER TE
164 0x30a8, // 0x30c7 KATAKANA LETTER DE
165 0x30aa, // 0x30c8 KATAKANA LETTER TO
166 0x30aa, // 0x30c9 KATAKANA LETTER DO
167 0x30a2, // 0x30ca KATAKANA LETTER NA
168 0x30a4, // 0x30cb KATAKANA LETTER NI
169 0x30a6, // 0x30cc KATAKANA LETTER NU
170 0x30a8, // 0x30cd KATAKANA LETTER NE
171 0x30aa, // 0x30ce KATAKANA LETTER NO
172 0x30a2, // 0x30cf KATAKANA LETTER HA
173 0x30a2, // 0x30d0 KATAKANA LETTER BA
174 0x30a2, // 0x30d1 KATAKANA LETTER PA
175 0x30a4, // 0x30d2 KATAKANA LETTER HI
176 0x30a4, // 0x30d3 KATAKANA LETTER BI
177 0x30a4, // 0x30d4 KATAKANA LETTER PI
178 0x30a6, // 0x30d5 KATAKANA LETTER HU
179 0x30a6, // 0x30d6 KATAKANA LETTER BU
180 0x30a6, // 0x30d7 KATAKANA LETTER PU
181 0x30a8, // 0x30d8 KATAKANA LETTER HE
182 0x30a8, // 0x30d9 KATAKANA LETTER BE
183 0x30a8, // 0x30da KATAKANA LETTER PE
184 0x30aa, // 0x30db KATAKANA LETTER HO
185 0x30aa, // 0x30dc KATAKANA LETTER BO
186 0x30aa, // 0x30dd KATAKANA LETTER PO
187 0x30a2, // 0x30de KATAKANA LETTER MA
188 0x30a4, // 0x30df KATAKANA LETTER MI
189 0x30a6, // 0x30e0 KATAKANA LETTER MU
190 0x30a8, // 0x30e1 KATAKANA LETTER ME
191 0x30aa, // 0x30e2 KATAKANA LETTER MO
192 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA
193 0x30a2, // 0x30e4 KATAKANA LETTER YA
194 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU
195 0x30a6, // 0x30e6 KATAKANA LETTER YU
196 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO
197 0x30aa, // 0x30e8 KATAKANA LETTER YO
198 0x30a2, // 0x30e9 KATAKANA LETTER RA
199 0x30a4, // 0x30ea KATAKANA LETTER RI
200 0x30a6, // 0x30eb KATAKANA LETTER RU
201 0x30a8, // 0x30ec KATAKANA LETTER RE
202 0x30aa, // 0x30ed KATAKANA LETTER RO
203 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA
204 0x30a2, // 0x30ef KATAKANA LETTER WA
205 0x30a4, // 0x30f0 KATAKANA LETTER WI
206 0x30a8, // 0x30f1 KATAKANA LETTER WE
207 0x30aa, // 0x30f2 KATAKANA LETTER WO
208 0x0000, // 0x30f3 KATAKANA LETTER N
209 0x30a6, // 0x30f4 KATAKANA LETTER VU
210 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA
211 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE
212 0x30a2, // 0x30f7 KATAKANA LETTER VA
213 0x30a4, // 0x30f8 KATAKANA LETTER VI
214 0x30a8, // 0x30f9 KATAKANA LETTER VE
215 0x30aa // 0x30fa KATAKANA LETTER VO
216 // 0x0000, // 0x30fb KATAKANA MIDDLE DOT
217 // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK
218 // 0x0000, // 0x30fd KATAKANA ITERATION MARK
219 // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK
223 const sal_Unicode table_halfwidth
[] = {
224 // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP
225 // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET
226 // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET
227 // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA
228 // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT
229 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO
230 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A
231 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I
232 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U
233 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E
234 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O
235 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA
236 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU
237 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO
238 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU
239 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
240 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A
241 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I
242 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U
243 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E
244 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O
245 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA
246 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI
247 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU
248 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE
249 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO
250 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA
251 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI
252 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU
253 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE
254 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO
255 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA
256 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI
257 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU
258 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE
259 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO
260 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA
261 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI
262 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU
263 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE
264 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO
265 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA
266 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI
267 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU
268 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE
269 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO
270 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA
271 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI
272 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU
273 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME
274 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO
275 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA
276 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU
277 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO
278 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA
279 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI
280 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU
281 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE
282 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO
283 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA
284 // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N
285 // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK
286 // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
291 ignoreProlongedSoundMark_ja_JP::foldingImpl( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
, Sequence
< sal_Int32
>* pOffset
)
293 // Create a string buffer which can hold nCount + 1 characters.
294 // The reference count is 1 now.
295 rtl_uString
* newStr
= rtl_uString_alloc(nCount
);
296 sal_Unicode
* dst
= newStr
->buffer
;
297 const sal_Unicode
* src
= inStr
.getStr() + startPos
;
300 // Allocate nCount length to offset argument.
301 pOffset
->realloc( nCount
);
302 auto [begin
, end
] = asNonConstRange(*pOffset
);
303 std::iota(begin
, end
, startPos
);
307 sal_Unicode previousChar
= *src
++;
308 sal_Unicode currentChar
;
311 while (-- nCount
> 0) {
312 currentChar
= *src
++;
314 if (currentChar
== 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK
315 currentChar
== 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
317 if (0x3041 <= previousChar
&& previousChar
<= 0x30fa) {
318 currentChar
= table_normalwidth
[ previousChar
- 0x3041 ];
320 else if (0xff66 <= previousChar
&& previousChar
<= 0xff9c) {
321 currentChar
= table_halfwidth
[ previousChar
- 0xff66 ];
325 *dst
++ = previousChar
;
326 previousChar
= currentChar
;
330 *dst
++ = previousChar
;
335 newStr
->length
= sal_Int32(dst
- newStr
->buffer
);
337 pOffset
->realloc(newStr
->length
);
338 return OUString(newStr
, SAL_NO_ACQUIRE
); // take ownership
344 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */