1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: textconversion_ko.cxx,v $
10 * $Revision: 1.15.22.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include <textconversion.hxx>
35 #include <com/sun/star/i18n/TextConversionType.hpp>
36 #include <com/sun/star/i18n/TextConversionOption.hpp>
37 #include <com/sun/star/linguistic2/ConversionDirection.hpp>
38 #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
39 #include <rtl/ustrbuf.hxx>
40 #include <i18nutil/x_rtl_ustring.h>
41 #include <unicode/uchar.h>
43 using namespace com::sun::star::lang
;
44 using namespace com::sun::star::i18n
;
45 using namespace com::sun::star::linguistic2
;
46 using namespace com::sun::star::uno
;
49 namespace com
{ namespace sun
{ namespace star
{ namespace i18n
{
51 #define SCRIPT_OTHERS 0
52 #define SCRIPT_HANJA 1
53 #define SCRIPT_HANGUL 2
55 TextConversion_ko::TextConversion_ko( const Reference
< XMultiServiceFactory
>& xMSF
)
57 Reference
< XInterface
> xI
;
59 xI
= xMSF
->createInstance(
60 OUString::createFromAscii("com.sun.star.i18n.ConversionDictionary_ko"));
63 xI
->queryInterface( getCppuType((const Reference
< XConversionDictionary
>*)0) ) >>= xCD
;
65 xI
= xMSF
->createInstance(
66 OUString::createFromAscii( "com.sun.star.linguistic2.ConversionDictionaryList" ));
69 xI
->queryInterface( getCppuType((const Reference
< XConversionDictionaryList
>*)0) ) >>= xCDL
;
71 maxLeftLength
= maxRightLength
= 1;
73 // get maximum length of word in dictionary
75 Locale
loc(OUString::createFromAscii("ko"),
76 OUString::createFromAscii("KR"),
78 maxLeftLength
= xCDL
->queryMaxCharCount(loc
,
79 ConversionDictionaryType::HANGUL_HANJA
,
80 ConversionDirection_FROM_LEFT
);
81 maxRightLength
= xCDL
->queryMaxCharCount(loc
,
82 ConversionDictionaryType::HANGUL_HANJA
,
83 ConversionDirection_FROM_RIGHT
);
85 sal_Int32 tmp
= xCD
->getMaxCharCount(ConversionDirection_FROM_LEFT
);
86 if (tmp
> maxLeftLength
)
88 tmp
= xCD
->getMaxCharCount(ConversionDirection_FROM_RIGHT
);
89 if (tmp
> maxRightLength
)
92 } else if (xCD
.is()) {
93 maxLeftLength
= xCD
->getMaxCharCount(ConversionDirection_FROM_LEFT
);
94 maxRightLength
= xCD
->getMaxCharCount(ConversionDirection_FROM_RIGHT
);
97 implementationName
= "com.sun.star.i18n.TextConversion_ko";
100 sal_Int16 SAL_CALL
checkScriptType(sal_Unicode c
)
108 static UBlock2Script scriptList
[] = {
109 {UBLOCK_HANGUL_JAMO
, UBLOCK_HANGUL_JAMO
, SCRIPT_HANGUL
},
110 {UBLOCK_CJK_RADICALS_SUPPLEMENT
, UBLOCK_BOPOMOFO
, SCRIPT_HANJA
},
111 {UBLOCK_HANGUL_COMPATIBILITY_JAMO
, UBLOCK_HANGUL_COMPATIBILITY_JAMO
, SCRIPT_HANGUL
},
112 {UBLOCK_KANBUN
, UBLOCK_YI_RADICALS
, SCRIPT_HANJA
},
113 {UBLOCK_HANGUL_SYLLABLES
, UBLOCK_HANGUL_SYLLABLES
, SCRIPT_HANGUL
},
114 {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
, SCRIPT_HANJA
},
115 {UBLOCK_COMBINING_HALF_MARKS
, UBLOCK_SMALL_FORM_VARIANTS
, SCRIPT_HANJA
},
116 {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
, SCRIPT_HANJA
},
119 #define scriptListCount sizeof (scriptList) / sizeof (UBlock2Script)
121 UBlockCode block
=ublock_getCode((sal_uInt32
) c
);
123 for ( i
= 0; i
< scriptListCount
; i
++) {
124 if (block
<= scriptList
[i
].to
) break;
126 return (i
< scriptListCount
&& block
>= scriptList
[i
].from
) ? scriptList
[i
].script
: SCRIPT_OTHERS
;
129 Sequence
< OUString
> SAL_CALL
130 TextConversion_ko::getCharConversions(const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
, sal_Bool toHanja
)
133 Sequence
< OUString
> output
;
134 const sal_Unicode
* (*getHangul2HanjaData
)() = (const sal_Unicode
* (*)())getFunctionBySymbol("getHangul2HanjaData");
135 const Hangul_Index
* (*getHangul2HanjaIndex
)() = (const Hangul_Index
* (*)()) getFunctionBySymbol("getHangul2HanjaIndex");
136 sal_Int16 (*getHangul2HanjaIndexCount
)() = (sal_Int16 (*)()) getFunctionBySymbol("getHangul2HanjaIndexCount");
137 const sal_uInt16
* (*getHanja2HangulIndex
)() = (const sal_uInt16
* (*)()) getFunctionBySymbol("getHanja2HangulIndex");
138 const sal_Unicode
* (*getHanja2HangulData
)() = (const sal_Unicode
* (*)()) getFunctionBySymbol("getHanja2HangulData");
139 if (toHanja
&& getHangul2HanjaIndex
&& getHangul2HanjaIndexCount
&& getHangul2HanjaData
) {
140 ch
= aText
[nStartPos
];
141 const Hangul_Index
*Hangul_ko
= getHangul2HanjaIndex();
142 sal_Int16 top
= getHangul2HanjaIndexCount();
144 sal_Int16 bottom
= 0;
146 while (bottom
<= top
) {
147 sal_Int16 current
= (top
+ bottom
) / 2;
148 sal_Unicode current_ch
= Hangul_ko
[current
].code
;
151 else if (ch
> current_ch
)
152 bottom
= current
+ 1;
154 const sal_Unicode
*ptr
= getHangul2HanjaData() + Hangul_ko
[current
].address
;
155 sal_Int16 count
= Hangul_ko
[current
].count
;
156 output
.realloc(count
);
157 for (sal_Int16 i
= 0; i
< count
; i
++)
158 output
[i
] = OUString(ptr
+ i
, 1);
162 } else if (! toHanja
&& getHanja2HangulIndex
&& getHanja2HangulData
) {
163 rtl_uString
* newStr
= x_rtl_uString_new_WithLength( nLength
); // defined in x_rtl_ustring.h
165 while (count
< nLength
) {
166 ch
= aText
[nStartPos
+ count
];
167 sal_Unicode address
= getHanja2HangulIndex()[ch
>>8];
168 if (address
!= 0xFFFF)
169 address
= getHanja2HangulData()[address
+ (ch
& 0xFF)];
171 if (address
!= 0xFFFF)
172 newStr
->buffer
[count
++] = address
;
178 output
[0] = OUString( newStr
->buffer
, count
);
184 static Sequence
< OUString
>& operator += (Sequence
< OUString
> &rSeq1
, Sequence
< OUString
> &rSeq2
)
186 if (! rSeq1
.hasElements() && rSeq2
.hasElements())
188 else if (rSeq2
.hasElements()) {
189 sal_Int32 i
, j
, k
, l
;
190 k
= l
= rSeq1
.getLength();
191 rSeq1
.realloc(l
+ rSeq2
.getLength());
193 for (i
= 0; i
< rSeq2
.getLength(); i
++) {
194 for (j
= 0; j
< l
; j
++)
195 if (rSeq1
[j
] == rSeq2
[i
])
198 rSeq1
[k
++] = rSeq2
[i
];
200 if (rSeq1
.getLength() > k
)
206 TextConversionResult SAL_CALL
207 TextConversion_ko::getConversions( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
208 const Locale
& aLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
)
209 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)
211 TextConversionResult result
;
212 Sequence
<OUString
> candidates
;
213 result
.Boundary
.startPos
= result
.Boundary
.endPos
= 0;
215 // do conversion only when there are right conversion type and dictionary services.
216 if (nConversionType
== TextConversionType::TO_HANGUL
||
217 nConversionType
== TextConversionType::TO_HANJA
) {
218 sal_Int32 start
, end
, length
= aText
.getLength() - nStartPos
;
220 if (length
< 0 || nStartPos
< 0)
222 else if (length
> nLength
)
225 sal_Int16 scriptType
= SCRIPT_OTHERS
;
227 sal_Bool toHanja
= (nConversionType
== TextConversionType::TO_HANJA
);
228 // FROM_LEFT: Hangul -> Hanja
229 // FROM_RIGHT: Hanja -> Hangul
230 ConversionDirection eDirection
= toHanja
? ConversionDirection_FROM_LEFT
: ConversionDirection_FROM_RIGHT
;
231 sal_Int32 maxLength
= toHanja
? maxLeftLength
: maxRightLength
;
232 if (maxLength
== 0) maxLength
= 1;
234 // search for a max length of convertible text
235 for (start
= 0, end
= 0; start
< length
; start
++) {
237 scriptType
= checkScriptType(aText
[nStartPos
+ start
]);
238 if (nConversionType
== TextConversionType::TO_HANJA
) {
239 if (scriptType
!= SCRIPT_HANGUL
) // skip non-Hangul characters
242 if (scriptType
!= SCRIPT_HANJA
) // skip non-Hanja characters
247 if (nConversionOptions
& TextConversionOption::CHARACTER_BY_CHARACTER
) {
248 result
.Candidates
= getCharConversions(aText
, nStartPos
+ start
, len
, toHanja
); // char2char conversion
250 for (; end
< length
&& end
- start
< maxLength
; end
++)
251 if (checkScriptType(aText
[nStartPos
+ end
]) != scriptType
)
254 for (len
= end
- start
; len
> 0; len
--) {
258 result
.Candidates
= xCDL
->queryConversions(aText
, start
+ nStartPos
, len
,
259 aLocale
, ConversionDictionaryType::HANGUL_HANJA
, eDirection
, nConversionOptions
); // user dictionary
261 catch ( NoSupportException
& ) {
262 // clear reference (when there is no user dictionary) in order
263 // to not always have to catch this exception again
264 // in further calls. (save time)
268 // catch all other exceptions to allow
269 // querying the system dictionary in the next line
271 if (xCD
.is() && toHanja
) { // System dictionary would not do Hanja_to_Hangul conversion.
272 candidates
= xCD
->getConversions(aText
, start
+ nStartPos
, len
, eDirection
, nConversionOptions
);
273 result
.Candidates
+= candidates
;
275 } else if (! toHanja
) { // do whole word character 2 character conversion for Hanja to Hangul conversion
276 result
.Candidates
= getCharConversions(aText
, nStartPos
+ start
, length
- start
, toHanja
);
277 if (result
.Candidates
.hasElements())
278 len
= result
.Candidates
[0].getLength();
280 if (result
.Candidates
.hasElements())
285 if (result
.Candidates
.hasElements()) {
286 result
.Boundary
.startPos
= start
+ nStartPos
;;
287 result
.Boundary
.endPos
= start
+ len
+ nStartPos
;
292 throw NoSupportException(); // Conversion type is not supported in this service.
297 TextConversion_ko::getConversion( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
298 const Locale
& aLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
)
299 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)
301 sal_Int32 length
= aText
.getLength() - nStartPos
;
303 if (length
<= 0 || nStartPos
< 0)
305 else if (length
> nLength
)
308 OUStringBuffer
aBuf(length
+ 1);
309 TextConversionResult result
;
310 const sal_Unicode
*str
= aText
.getStr();
312 for (sal_Int32 start
= nStartPos
; length
+ nStartPos
> start
; start
= result
.Boundary
.endPos
) {
314 result
= getConversions(aText
, start
, length
+ nStartPos
- start
, aLocale
, nConversionType
, nConversionOptions
);
316 if (result
.Boundary
.endPos
> 0) {
317 if (result
.Boundary
.startPos
> start
)
318 aBuf
.append(str
+ start
, result
.Boundary
.startPos
- start
); // append skip portion
319 aBuf
.append(result
.Candidates
[0]); // append converted portion
321 if (length
+ nStartPos
> start
)
322 aBuf
.append(str
+ start
, length
+ nStartPos
- start
); // append last portion
327 return aBuf
.makeStringAndClear();
331 TextConversion_ko::getConversionWithOffset( const OUString
& aText
, sal_Int32 nStartPos
, sal_Int32 nLength
,
332 const Locale
& rLocale
, sal_Int16 nConversionType
, sal_Int32 nConversionOptions
, Sequence
<sal_Int32
>& offset
)
333 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)
336 return getConversion(aText
, nStartPos
, nLength
, rLocale
, nConversionType
, nConversionOptions
);
340 TextConversion_ko::interactiveConversion( const Locale
& /*rLocale*/, sal_Int16
/*nTextConversionType*/, sal_Int32
/*nTextConversionOptions*/ )
341 throw( RuntimeException
, IllegalArgumentException
, NoSupportException
)