1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #ifndef __com_sun_star_i18n_XTransliteration_idl__
20 #define __com_sun_star_i18n_XTransliteration_idl__
22 #include
<com
/sun
/star
/lang
/Locale.idl
>
23 #include
<com
/sun
/star
/uno
/XInterface.idl
>
24 #include
<com
/sun
/star
/i18n
/TransliterationModules.idl
>
25 #include
<com
/sun
/star
/i18n
/TransliterationModulesNew.idl
>
28 module com
{ module sun
{ module star
{ module i18n
{
32 Character conversions like case folding or Hiragana to Katakana.
34 <p> Transliteration is a character to character conversion but it is
35 not always a one to one mapping between characters. Transliteration
36 modules are primarily used by collation, and search and replace
37 modules to perform approximate search. It can also be used to format
38 the numbers in different numbering systems. </p>
40 <p> In order to select transliteration modules for different
41 purposes, they are classified with attributes of
42 TransliterationType. </p>
44 <p> For Western languages there would be three transliteration
45 modules available to compare two mixed case strings: upper to lower,
46 lower to upper, and ignore case. </p>
48 <p> A typical calling sequence of transliteration is
50 <li> getAvailableModules() </li>
51 <li> loadModulesByImplNames() </li>
56 <li> loadModule() </li>
57 <li> transliterate() </li>
65 * All the IGNORE-type functionalities (Range, equals) are based on mapping.
66 * except equals() method in IGNORE_CASE, which is based on Locale-independent
68 * ( This second assumption is very complicated and may cause confusion of use)
71 * We are assuming Upper to Lower mapping as one of transliteration.
72 * The mapping depends on Locale.
73 * Upper <-> Lower methods are just wrappers to provide equals() and Range()
76 * equals() in IGNORE_CASE module is locale-independent and
77 * we don't provide locale-sensitive ones.
78 * The reason we provided locale-independent ones is that IGNORE_CASE is mainly
79 * dedicated to StarOffice internal code.
82 * TransliterationModules is used just for convenience without calling
86 * Implementation name in the methods below is not the same as
87 * the true implementation name registered.
88 * In particular, for generic modules:"UPPERCASE_LOWERCASE",
89 * "LOWERCASE_UPPERCASE", "IGNORE_CASE", there is no registered name.
93 published
interface XTransliteration
: com
::sun
::star
::uno
::XInterface
96 /** Unique ASCII name to identify a module. This name is used
97 to get its localized name for menus, dialogs etc. The behavior
98 is undefined for TransliterationType::CASCADE
103 /** Return the attribute(s) associated with this transliteration
104 object, as defined in TransliterationType. The
105 value is determined by the transliteration modules. For example,
106 for UPPERCASE_LOWERCASE, a ONE_TO_ONE is returned, for
107 IGNORE_CASE, IGNORE is returned.
111 /** Load instance of predefined module - old style method.
113 void loadModule
( [in] TransliterationModules eModType
,
114 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
116 /** Load a sequence of instances of predefined modules - supersedes
117 method XTransliteration::loadModule().
119 void loadModuleNew
( [in] sequence
<TransliterationModulesNew
> aModType
,
120 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
122 /** Load instance of UNO registered module.
124 <p> Each transliteration module is registered under a different
125 service name. The convention for the service name is
126 com.sun.star.i18n.Transliteration.l10n.{implName}. The
127 {implName} is a unique name used to identify a module. The
128 implName is used to get a localized name for the transliteration
129 module. The implName is used in locale data to list the
130 available transliteration modules for the locale. There are some
131 transliteration modules that are always available. The names of
132 those modules are listed as enum
133 TransliterationModules names. For modules not
134 listed there it is possible to load them directly by their
138 The module's {implName} under which it is registered with
139 com.sun.star.i18n.Transliteration.l10n.{implName}.
141 The locale for which the module is requested.
143 void loadModuleByImplName
( [in] string aImplName
,
144 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
146 /** Load a sequence of instances of transliteration modules.
147 Output of one module is fed as input to the next module in
148 the sequence. The object created by this call has
149 TransliterationType CASCADE and IGNORE types.
152 Only IGNORE type modules can be specified.
154 The locale for which the modules are requested.
156 void loadModulesByImplNames
( [in] sequence
<string> aImplNameList
,
157 [in] ::com
::sun
::star
::lang
::Locale aLocale
);
159 /** List the available transliteration modules for a given locale.
160 It can be filtered based on its type.
163 A bitmask field of values defined in
166 The locale for which the modules are requested.
168 sequence
<string> getAvailableModules
(
169 [in] ::com
::sun
::star
::lang
::Locale aLocale
,
173 /** Transliterate a substring. This method can be called if the
174 object doesn't have TransliterationType IGNORE
181 Start position within aInStr from where transliteration starts.
184 Number of code points to be transliterated.
187 To find the grapheme of input string corresponding to the
188 grapheme of output string, rOffset provides the offset array
189 whose index is the offset of output string, the element
190 containing the position within the input string before
193 string transliterate
( [in] string aInStr
, [in] long nStartPos
,
194 [in] long nCount
, [out] sequence
<long> rOffset
);
197 For internal use, this method is supported to get the
198 "transliteration", which equals() is based on.
200 string folding
( [in] string aInStr
, [in] long nStartPos
,
201 [in] long nCount
, [out] sequence
<long> rOffset
);
203 /** Match two substrings and find if they are equivalent as per this
206 <p> This method can be called if the object has
207 TransliterationType IGNORE attribute. </p>
209 <p> Returns the number of matched code points in any case, even if
210 strings are not equal, for example: <br>
211 equals( "a", 0, 1, nMatch1, "aaa", 0, 3, nMatch2 ) <br>
212 returns `FALSE` and nMatch:=1 and nMatch2:=1 <br>
213 equals( "aab", 0, 3, nMatch1, "aaa", 0, 3, nMatch2 ) <br>
214 returns `FALSE` and nMatch:=2 and nMatch2:=2 <br> </p>
217 First string to match.
220 Start position within aStr1.
223 Number of code points to use of aStr1.
226 Returns number of matched code points in aStr1.
229 Second string to match.
232 Start position within aStr2.
235 Number of code points to use of aStr2.
238 Returns number of matched code points in aStr2.
241 `TRUE` if the substrings are equal per this
246 boolean equals
( [in] string aStr1
, [in] long nPos1
, [in] long nCount1
,
248 [in] string aStr2
, [in] long nPos2
, [in] long nCount2
,
249 [out] long rMatch2
);
251 /** Transliterate one set of characters to another.
253 <p> This method is intended for getting corresponding ranges and
254 can be called if the object has TransliterationType
255 IGNORE attribute. </p>
257 <p> For example: generic CASE_IGNORE transliterateRange( "a", "i" )
258 returns {"A","I","a","i"}, transliterateRange( "a", "a" )
259 returns {"A","A","a","a"}. </p>
261 <p> Use this transliteration to create regular expressions like
262 [a-i] --> [A-Ia-i]. </p>
265 String sequence containing corresponding transliterated
266 pairs of characters to represent a range.
268 sequence
<string> transliterateRange
( [in] string aStr1
, [in] string aStr2
);
270 /** Compare 2 substrings as per this transliteration. It translates both
271 substrings before comparing them.
277 Offset (from 0) of the first substring.
280 Length (from offset) of the first substring.
286 Offset (from 0) of the second substring.
289 Length (from offset) of the second substring.
292 1 if the first substring is greater than the second substring <br>
293 0 if the first substring is equal to the second substring <br>
294 -1 if the first substring is less than the second substring
296 long compareSubstring
( [in] string aStr1
, [in] long nOff1
, [in] long nLen1
,
297 [in] string aStr2
, [in] long nOff2
, [in] long nLen2
);
299 /** Compare 2 strings as per this transliteration. It translates both
300 strings before comparing them.
303 1 if the first string is greater than the second string <br>
304 0 if the first string is equal to the second string <br>
305 -1 if the first string is less than the second string
307 long compareString
( [in] string aStr1
, [in] string aStr2
);
315 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */