1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <i18nutil/oneToOneMapping.hxx>
22 #include <transliteration_Ignore.hxx>
24 using namespace com::sun::star::uno
;
25 using namespace com::sun::star::lang
;
28 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
30 OneToOneMappingTable_t ignoreIterationMark_ja_JP_mappingTable
[] = {
31 { 0x3046, 0x3094 }, // HIRAGANA LETTER U --> HIRAGANA LETTER VU
32 { 0x304B, 0x304C }, // HIRAGANA LETTER KA --> HIRAGANA LETTER GA
33 { 0x304D, 0x304E }, // HIRAGANA LETTER KI --> HIRAGANA LETTER GI
34 { 0x304F, 0x3050 }, // HIRAGANA LETTER KU --> HIRAGANA LETTER GU
35 { 0x3051, 0x3052 }, // HIRAGANA LETTER KE --> HIRAGANA LETTER GE
36 { 0x3053, 0x3054 }, // HIRAGANA LETTER KO --> HIRAGANA LETTER GO
37 { 0x3055, 0x3056 }, // HIRAGANA LETTER SA --> HIRAGANA LETTER ZA
38 { 0x3057, 0x3058 }, // HIRAGANA LETTER SI --> HIRAGANA LETTER ZI
39 { 0x3059, 0x305A }, // HIRAGANA LETTER SU --> HIRAGANA LETTER ZU
40 { 0x305B, 0x305C }, // HIRAGANA LETTER SE --> HIRAGANA LETTER ZE
41 { 0x305D, 0x305E }, // HIRAGANA LETTER SO --> HIRAGANA LETTER ZO
42 { 0x305F, 0x3060 }, // HIRAGANA LETTER TA --> HIRAGANA LETTER DA
43 { 0x3061, 0x3062 }, // HIRAGANA LETTER TI --> HIRAGANA LETTER DI
44 { 0x3064, 0x3065 }, // HIRAGANA LETTER TU --> HIRAGANA LETTER DU
45 { 0x3066, 0x3067 }, // HIRAGANA LETTER TE --> HIRAGANA LETTER DE
46 { 0x3068, 0x3069 }, // HIRAGANA LETTER TO --> HIRAGANA LETTER DO
47 { 0x306F, 0x3070 }, // HIRAGANA LETTER HA --> HIRAGANA LETTER BA
48 { 0x3072, 0x3073 }, // HIRAGANA LETTER HI --> HIRAGANA LETTER BI
49 { 0x3075, 0x3076 }, // HIRAGANA LETTER HU --> HIRAGANA LETTER BU
50 { 0x3078, 0x3079 }, // HIRAGANA LETTER HE --> HIRAGANA LETTER BE
51 { 0x307B, 0x307C }, // HIRAGANA LETTER HO --> HIRAGANA LETTER BO
52 { 0x309D, 0x309E }, // HIRAGANA ITERATION MARK --> HIRAGANA VOICED ITERATION MARK
53 { 0x30A6, 0x30F4 }, // KATAKANA LETTER U --> KATAKANA LETTER VU
54 { 0x30AB, 0x30AC }, // KATAKANA LETTER KA --> KATAKANA LETTER GA
55 { 0x30AD, 0x30AE }, // KATAKANA LETTER KI --> KATAKANA LETTER GI
56 { 0x30AF, 0x30B0 }, // KATAKANA LETTER KU --> KATAKANA LETTER GU
57 { 0x30B1, 0x30B2 }, // KATAKANA LETTER KE --> KATAKANA LETTER GE
58 { 0x30B3, 0x30B4 }, // KATAKANA LETTER KO --> KATAKANA LETTER GO
59 { 0x30B5, 0x30B6 }, // KATAKANA LETTER SA --> KATAKANA LETTER ZA
60 { 0x30B7, 0x30B8 }, // KATAKANA LETTER SI --> KATAKANA LETTER ZI
61 { 0x30B9, 0x30BA }, // KATAKANA LETTER SU --> KATAKANA LETTER ZU
62 { 0x30BB, 0x30BC }, // KATAKANA LETTER SE --> KATAKANA LETTER ZE
63 { 0x30BD, 0x30BE }, // KATAKANA LETTER SO --> KATAKANA LETTER ZO
64 { 0x30BF, 0x30C0 }, // KATAKANA LETTER TA --> KATAKANA LETTER DA
65 { 0x30C1, 0x30C2 }, // KATAKANA LETTER TI --> KATAKANA LETTER DI
66 { 0x30C4, 0x30C5 }, // KATAKANA LETTER TU --> KATAKANA LETTER DU
67 { 0x30C6, 0x30C7 }, // KATAKANA LETTER TE --> KATAKANA LETTER DE
68 { 0x30C8, 0x30C9 }, // KATAKANA LETTER TO --> KATAKANA LETTER DO
69 { 0x30CF, 0x30D0 }, // KATAKANA LETTER HA --> KATAKANA LETTER BA
70 { 0x30D2, 0x30D3 }, // KATAKANA LETTER HI --> KATAKANA LETTER BI
71 { 0x30D5, 0x30D6 }, // KATAKANA LETTER HU --> KATAKANA LETTER BU
72 { 0x30D8, 0x30D9 }, // KATAKANA LETTER HE --> KATAKANA LETTER BE
73 { 0x30DB, 0x30DC }, // KATAKANA LETTER HO --> KATAKANA LETTER BO
74 { 0x30EF, 0x30F7 }, // KATAKANA LETTER WA --> KATAKANA LETTER VA
75 { 0x30F0, 0x30F8 }, // KATAKANA LETTER WI --> KATAKANA LETTER VI
76 { 0x30F1, 0x30F9 }, // KATAKANA LETTER WE --> KATAKANA LETTER VE
77 { 0x30F2, 0x30FA }, // KATAKANA LETTER WO --> KATAKANA LETTER VO
78 { 0x30FD, 0x30FE } // KATAKANA ITERATION MARK --> KATAKANA VOICED ITERATION MARK
83 ignoreIterationMark_ja_JP::folding( const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
, Sequence
< sal_Int32
>& offset
)
84 throw(RuntimeException
, std::exception
)
86 oneToOneMapping
aTable(ignoreIterationMark_ja_JP_mappingTable
, sizeof(ignoreIterationMark_ja_JP_mappingTable
));
88 // Create a string buffer which can hold nCount + 1 characters.
89 // The reference count is 1 now.
90 rtl_uString
* newStr
= rtl_uString_alloc(nCount
);
91 sal_Unicode
* dst
= newStr
->buffer
;
92 const sal_Unicode
* src
= inStr
.getStr() + startPos
;
95 sal_Int32 position
= 0;
97 // Allocate nCount length to offset argument.
98 offset
.realloc( nCount
);
99 p
= offset
.getArray();
104 sal_Unicode previousChar
= *src
++;
105 sal_Unicode currentChar
;
108 while (-- nCount
> 0) {
109 currentChar
= *src
++;
111 switch ( currentChar
) {
112 case 0x30fd: // KATAKANA ITERATION MARK
113 case 0x309d: // HIRAGANA ITERATION MARK
114 case 0x3005: // IDEOGRAPHIC ITERATION MARK
115 currentChar
= previousChar
;
117 case 0x30fe: // KATAKANA VOICED ITERATION MARK
118 case 0x309e: // HIRAGANA VOICED ITERATION MARK
119 currentChar
= aTable
[ previousChar
];
124 *dst
++ = previousChar
;
125 previousChar
= currentChar
;
131 *dst
++ = previousChar
;
134 *dst
= (sal_Unicode
) 0;
136 newStr
->length
= sal_Int32(dst
- newStr
->buffer
);
138 offset
.realloc(newStr
->length
);
139 return OUString(newStr
, SAL_NO_ACQUIRE
); // take ownership
145 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */