1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: widthfolding.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // prevent internal compiler error with MSVC6SP3
33 #include <i18nutil/widthfolding.hxx>
34 #include <i18nutil/x_rtl_ustring.h>
35 #include "widthfolding_data.h"
37 using namespace com::sun::star::uno
;
40 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
42 sal_Unicode
widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar
)
44 if (0x30a0 <= inChar
&& inChar
<= 0x30ff) {
45 sal_Int16 i
= inChar
- 0x3040;
46 if (decomposition_table
[i
].decomposited_character_1
)
53 * Decompose Japanese specific voiced and semi-voiced sound marks.
55 OUString
widthfolding::decompose_ja_voiced_sound_marks (const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
, Sequence
< sal_Int32
>& offset
, sal_Bool useOffset
)
57 // Create a string buffer which can hold nCount * 2 + 1 characters.
58 // Its size may become double of nCount.
60 x_rtl_uString_new_WithLength( &newStr
, nCount
* 2 ); // defined in x_rtl_ustring.h The reference count is 0 now.
63 sal_Int32 position
= 0;
65 // Allocate double of nCount length to offset argument.
66 offset
.realloc( nCount
* 2 );
67 p
= offset
.getArray();
71 // Prepare pointers of unicode character arrays.
72 const sal_Unicode
* src
= inStr
.getStr() + startPos
;
73 sal_Unicode
* dst
= newStr
->buffer
;
75 // Decomposition: GA --> KA + voice-mark
76 while (nCount
-- > 0) {
77 sal_Unicode c
= *src
++;
78 // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
79 // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
80 // Hiragana is not applied to decomposition.
81 // Only Katakana is applied to decomposition
82 if (0x30a0 <= c
&& c
<= 0x30ff) {
83 int i
= int(c
- 0x3040);
84 sal_Unicode first
= decomposition_table
[i
].decomposited_character_1
;
85 if (first
!= 0x0000) {
87 *dst
++ = decomposition_table
[i
].decomposited_character_2
; // second
99 *dst
= (sal_Unicode
) 0;
101 newStr
->length
= sal_Int32(dst
- newStr
->buffer
);
103 offset
.realloc(newStr
->length
);
104 return OUString( newStr
); // defined in rtl/usrting. The reference count is increased from 0 to 1.
107 oneToOneMapping
& widthfolding::getfull2halfTable(void)
109 static oneToOneMappingWithFlag
table(full2half
, sizeof(full2half
), FULL2HALF_NORMAL
);
115 * Compose Japanese specific voiced and semi-voiced sound marks.
117 OUString
widthfolding::compose_ja_voiced_sound_marks (const OUString
& inStr
, sal_Int32 startPos
, sal_Int32 nCount
, Sequence
< sal_Int32
>& offset
, sal_Bool useOffset
, sal_Int32 nFlags
)
119 // Create a string buffer which can hold nCount + 1 characters.
120 // Its size may become equal to nCount or smaller.
121 // The reference count is 0 now.
122 rtl_uString
* newStr
= x_rtl_uString_new_WithLength( nCount
); // defined in x_rtl_ustring.h
124 // Prepare pointers of unicode character arrays.
125 const sal_Unicode
* src
= inStr
.getStr() + startPos
;
126 sal_Unicode
* dst
= newStr
->buffer
;
128 // This conversion algorithm requires at least one character.
131 // .. .. KA VOICE .. ..
133 // previousChar currentChar
137 // will be converted to
141 sal_Int32 position
= 0;
143 // Allocate nCount length to offset argument.
144 offset
.realloc( nCount
);
145 p
= offset
.getArray();
150 sal_Unicode previousChar
= *src
++;
151 sal_Unicode currentChar
;
153 // Composition: KA + voice-mark --> GA
154 while (-- nCount
> 0) {
155 currentChar
= *src
++;
156 // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
157 // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
158 // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
159 // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
160 // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
161 // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
162 int j
= currentChar
- 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ?
164 if (2 <= j
&& j
<= 3) // 0x309b or 0x309c
167 if (0 <= j
&& j
<= 1) {
168 // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark),
169 // 1 is 0x309a or 0x309c (semi-voiced sound mark)
170 int i
= int(previousChar
- 0x3040); // i acts as an index of array
171 sal_Bool bCompose
= sal_False
;
173 if (0 <= i
&& i
<= (0x30ff - 0x3040) && composition_table
[i
][j
])
176 // not to use combined KATAKANA LETTER VU
177 if ( previousChar
== 0x30a6 && (nFlags
& WIDTHFOLDNIG_DONT_USE_COMBINED_VU
) )
178 bCompose
= sal_False
;
185 *dst
++ = composition_table
[i
][j
];
186 previousChar
= *src
++;
193 *dst
++ = previousChar
;
194 previousChar
= currentChar
;
200 *dst
++ = previousChar
;
203 *dst
= (sal_Unicode
) 0;
205 newStr
->length
= sal_Int32(dst
- newStr
->buffer
);
208 offset
.realloc(newStr
->length
);
209 return OUString( newStr
); // defined in rtl/usrting. The reference count is increased from 0 to 1.
212 oneToOneMapping
& widthfolding::gethalf2fullTable(void)
214 static oneToOneMappingWithFlag
table(half2full
, sizeof(half2full
), HALF2FULL_NORMAL
);
219 sal_Unicode
widthfolding::getCompositionChar(sal_Unicode c1
, sal_Unicode c2
)
221 return composition_table
[c1
- 0x3040][c2
- 0x3099];
225 oneToOneMapping
& widthfolding::getfull2halfTableForASC()
227 static oneToOneMappingWithFlag
table(full2half
, sizeof(full2half
), FULL2HALF_ASC_FUNCTION
);
230 // bluedwarf: dirty hack!
231 // There is an exception. Additional conversion is required following:
232 // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS)
234 // See the following page for detail:
235 // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
237 int n
= sizeof(full2halfASCException
) / sizeof(UnicodePairWithFlag
);
238 for( i
= 0; i
< n
; i
++ )
240 high
= (full2halfASCException
[i
].first
>> 8) & 0xFF;
241 low
= (full2halfASCException
[i
].first
) & 0xFF;
243 if( !table
.mpIndex
[high
] )
245 table
.mpIndex
[high
] = new UnicodePairWithFlag
*[256];
247 for( j
= 0; j
< 256; j
++ )
248 table
.mpIndex
[high
][j
] = NULL
;
250 table
.mpIndex
[high
][low
] = &full2halfASCException
[i
];
256 oneToOneMapping
& widthfolding::gethalf2fullTableForJIS()
258 static oneToOneMappingWithFlag
table(half2full
, sizeof(half2full
), HALF2FULL_JIS_FUNCTION
);
261 // bluedwarf: dirty hack!
262 // There are some exceptions. Additional conversion are required following:
263 // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK)
264 // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK)
265 // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN)
266 // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK)
268 // See the following page for detail:
269 // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
271 int n
= sizeof(half2fullJISException
) / sizeof(UnicodePairWithFlag
);
272 for( i
= 0; i
< n
; i
++ )
274 high
= (half2fullJISException
[i
].first
>> 8) & 0xFF;
275 low
= (half2fullJISException
[i
].first
) & 0xFF;
277 if( !table
.mpIndex
[high
] )
279 table
.mpIndex
[high
] = new UnicodePairWithFlag
*[256];
281 for( j
= 0; j
< 256; j
++ )
282 table
.mpIndex
[high
][j
] = NULL
;
284 table
.mpIndex
[high
][low
] = &half2fullJISException
[i
];
290 oneToOneMapping
& widthfolding::getfullKana2halfKanaTable()
292 static oneToOneMappingWithFlag
table(full2half
, sizeof(full2half
), FULL2HALF_KATAKANA_ONLY
);
297 oneToOneMapping
& widthfolding::gethalfKana2fullKanaTable()
299 static oneToOneMappingWithFlag
table(half2full
, sizeof(half2full
), HALF2FULL_KATAKANA_ONLY
);