Update ooo320-m1
[ooovba.git] / offapi / com / sun / star / i18n / XCharacterClassification.idl
blob25a920ecbd85530c420c334aacabf4720f2ddac5
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: XCharacterClassification.idl,v $
10 * $Revision: 1.15 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
32 #define __com_sun_star_i18n_XCharacterClassification_idl__
34 #include <com/sun/star/i18n/ParseResult.idl>
36 #ifndef __com_sun_star_lang_Locale_idl__
37 #include <com/sun/star/lang/Locale.idl>
38 #endif
39 #ifndef __com_sun_star_uno_XInterface_idl__
40 #include <com/sun/star/uno/XInterface.idl>
41 #endif
43 //============================================================================
45 module com { module sun { module star { module i18n {
47 //============================================================================
51 Possible tokens to be parsed with parse...Token():
53 UPASCALPHA=[A-Z]
54 LOASCALPHA=[a-z]
55 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
56 ASCDIGIT=[0-9]
57 ASC_UNDERSCORE='_'
58 ASC_SPACE=' '
59 ASC_HT='\0x9'
60 ASC_VT='\0xb'
61 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
62 ASC_DBL_QUOTE=\";
63 ASC_QUOTE=\'
64 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
66 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
67 ALNUM=ALPHA|DIGIT
68 CHAR=anycharacter
69 WS=isWhiteSpace()
70 SIGN='+'|'-'
71 DECSEP=<locale dependent decimal separator>
72 GRPSEP=<locale dependent thousand separator>
73 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
75 IDENTIFIER=ALPHA *ALNUM
76 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
77 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
78 ANY_NAME=1*(ALNUM|DEFCHARS)
79 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
80 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
81 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
82 NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
86 //============================================================================
88 /**
89 Character classification (upper, lower, digit, letter, number, ...)
90 and generic Unicode enabled parser.
93 published interface XCharacterClassification : com::sun::star::uno::XInterface
95 //------------------------------------------------------------------------
96 /** Convert lower case alpha to upper case alpha, starting at
97 position <em>nPos</em> for <em>nCount</em> code points.
99 string toUpper( [in] string aText, [in] long nPos, [in] long nCount,
100 [in] com::sun::star::lang::Locale aLocale );
102 //------------------------------------------------------------------------
103 /** Convert upper case alpha to lower case alpha, starting at
104 position <em>nPos</em> for <em>nCount</em> code points.
106 string toLower( [in] string aText, [in] long nPos, [in] long nCount,
107 [in] com::sun::star::lang::Locale aLocale );
109 //------------------------------------------------------------------------
110 /** Convert to title case, starting at
111 position <em>nPos</em> for <em>nCount</em> code points.
113 string toTitle( [in] string aText, [in] long nPos, [in] long nCount,
114 [in] com::sun::star::lang::Locale aLocale );
116 //------------------------------------------------------------------------
117 /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
118 short getType( [in] string aText, [in] long nPos );
120 //------------------------------------------------------------------------
121 /** Get <type>DirectionProperty</type> of character at position
122 <em>nPos</em>.
124 short getCharacterDirection( [in] string aText, [in] long nPos );
126 //------------------------------------------------------------------------
127 /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
128 short getScript( [in] string aText, [in] long nPos );
130 //------------------------------------------------------------------------
131 /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
132 long getCharacterType( [in] string aText, [in] long nPos,
133 [in] com::sun::star::lang::Locale aLocale );
135 //------------------------------------------------------------------------
136 /** Get accumulated <type>KCharacterType</type>s of string starting
137 at position <em>nPos</em> of length <em>nCount</em> code points.
139 @returns
140 A number with appropriate flags set to indicate what type of
141 characters the string contains, each flag value being one of
142 KCharacterType values.
144 long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
145 [in] com::sun::star::lang::Locale aLocale );
148 //------------------------------------------------------------------------
150 Parse a string for a token starting at position <em>nPos</em>.
152 <p> A name or identifier must match the
153 <type>KParseTokens</type> criteria passed in
154 <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
155 additionally contain characters of
156 <em>aUserDefinedCharactersStart</em> and/or
157 <em>aUserDefinedCharactersCont</em>. </p>
160 @returns
161 A filled <type>ParseResult</type> structure. If no
162 unambigous token could be parsed,
163 <member>ParseResult::TokenType</member> will be set to
164 <b>0</b> (zero), other fields will contain the values parsed
165 so far.
167 <p> If a token may represent either a numeric value or a
168 name according to the passed Start/Cont-Flags/Chars, both
169 <const>KParseType::ASC_NUM</const> (or
170 <const>KParseType::UNI_NUM</const>) and
171 <const>KParseType::IDENTNAME</const> are set in
172 <member>ParseResult::TokenType</member>.
174 @param aText
175 Text to be parsed.
177 @param nPos
178 Position where parsing starts.
180 @param aLocale
181 The locale, for example, for decimal and group separator or
182 character type determination.
184 @param nStartCharFlags
185 A set of <type>KParseTokens</type> constants determining the
186 allowed characters a name or identifier may start with.
188 @param aUserDefinedCharactersStart
189 A set of additionally allowed characters a name or
190 identifier may start with.
192 @param nContCharFlags
193 A set of <type>KParseTokens</type> constants determining the
194 allowed characters a name or identifier may continue with.
196 @param aUserDefinedCharactersCont
197 A set of additionally allowed characters a name or
198 identifier may continue with.
200 @example:C++
201 <listing>
202 using namespace ::com::sun::star::i18n;
203 // First character of an identifier may be any alphabetic or underscore.
204 sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
205 // Continuing characters may be any alphanumeric or underscore or dot.
206 sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
207 // No further characters assumed to be contained in an identifier
208 String aEmptyString;
209 // Parse any token.
210 ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
211 nStartFlags, aEmptyString, nContFlags, aEmptyString );
212 // Get parsed token.
213 if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
214 fValue = rRes.Value;
215 if ( rRes.TokenType & KParseType::IDENTNAME )
216 aName = aText.Copy( nPos, rRes.EndPos - nPos );
217 else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
218 aName = rRes.DequotedNameOrString;
219 else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
220 aString = rRes.DequotedNameOrString;
221 else if ( rRes.TokenType & KParseType::BOOLEAN )
222 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
223 else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
224 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
225 </listing>
228 ParseResult parseAnyToken(
229 [in] string aText,
230 [in] long nPos,
231 [in] com::sun::star::lang::Locale aLocale,
232 [in] long nStartCharFlags,
233 [in] string aUserDefinedCharactersStart,
234 [in] long nContCharFlags,
235 [in] string aUserDefinedCharactersCont
238 //------------------------------------------------------------------------
240 Parse a string for a token of type <em>nTokenType</em> starting
241 at position <em>nPos</em>.
243 <p> Other parameters are the same as in
244 <member>parseAnyToken</member>. If the actual token does not
245 match the passed <em>nTokenType</em> a
246 <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
247 is returned. </p>
249 @param nTokenType
250 One or more of the <type>KParseType</type> constants.
252 @example:C++
253 <listing>
254 // Determine if a given name is a valid name (not quoted) and contains
255 // only allowed characters.
256 using namespace ::com::sun::star::i18n;
257 // First character of an identifier may be any alphanumeric or underscore.
258 sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
259 // No further characters assumed to be contained in an identifier start.
260 String aEmptyString;
261 // Continuing characters may be any alphanumeric or underscore.
262 sal_Int32 nContFlags = nStartFlags;
263 // Additionally, continuing characters may contain a blank.
264 String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
265 // Parse predefined (must be an IDENTNAME) token.
266 ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
267 nStartFlags, aEmptyString, nContFlags, aContChars );
268 // Test if it is an identifier name and if it only is one
269 // and no more else is following it.
270 bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
271 </listing>
274 ParseResult parsePredefinedToken(
275 [in] long nTokenType,
276 [in] string aText,
277 [in] long nPos,
278 [in] com::sun::star::lang::Locale aLocale,
279 [in] long nStartCharFlags,
280 [in] string aUserDefinedCharactersStart,
281 [in] long nContCharFlags,
282 [in] string aUserDefinedCharactersCont
286 //=============================================================================
287 }; }; }; };
289 #endif