Version 5.2.6.1, tag libreoffice-5.2.6.1
[LibreOffice.git] / offapi / com / sun / star / i18n / XCharacterClassification.idl
blob9540f7c440764527ac7b8d4c879180eac01f241d
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
21 #define __com_sun_star_i18n_XCharacterClassification_idl__
23 #include <com/sun/star/i18n/ParseResult.idl>
24 #include <com/sun/star/lang/Locale.idl>
25 #include <com/sun/star/uno/XInterface.idl>
28 module com { module sun { module star { module i18n {
33 Possible tokens to be parsed with parse...Token():
35 UPASCALPHA=[A-Z]
36 LOASCALPHA=[a-z]
37 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
38 ASCDIGIT=[0-9]
39 ASC_UNDERSCORE='_'
40 ASC_SPACE=' '
41 ASC_HT='\0x9'
42 ASC_VT='\0xb'
43 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
44 ASC_DBL_QUOTE=\";
45 ASC_QUOTE=\'
46 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
48 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
49 ALNUM=ALPHA|DIGIT
50 CHAR=anycharacter
51 WS=isWhiteSpace()
52 SIGN='+'|'-'
53 DECSEP=<locale dependent decimal separator>
54 GRPSEP=<locale dependent thousand separator>
55 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
57 IDENTIFIER=ALPHA *ALNUM
58 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
59 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
60 ANY_NAME=1*(ALNUM|DEFCHARS)
61 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
62 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
63 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
64 NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
69 /**
70 Character classification (upper, lower, digit, letter, number, ...)
71 and generic Unicode enabled parser.
74 published interface XCharacterClassification : com::sun::star::uno::XInterface
76 /** Convert lower case alpha to upper case alpha, starting at
77 position <em>nPos</em> for <em>nCount</em> code points.
79 string toUpper( [in] string aText, [in] long nPos, [in] long nCount,
80 [in] com::sun::star::lang::Locale aLocale );
82 /** Convert upper case alpha to lower case alpha, starting at
83 position <em>nPos</em> for <em>nCount</em> code points.
85 string toLower( [in] string aText, [in] long nPos, [in] long nCount,
86 [in] com::sun::star::lang::Locale aLocale );
88 /** Convert to title case, starting at
89 position <em>nPos</em> for <em>nCount</em> code points.
91 string toTitle( [in] string aText, [in] long nPos, [in] long nCount,
92 [in] com::sun::star::lang::Locale aLocale );
94 /// Get UnicodeType of character at position <em>nPos</em>.
95 short getType( [in] string aText, [in] long nPos );
97 /** Get DirectionProperty of character at position
98 <em>nPos</em>.
100 short getCharacterDirection( [in] string aText, [in] long nPos );
102 /// Get UnicodeScript of character at position <em>nPos</em>.
103 short getScript( [in] string aText, [in] long nPos );
105 /// Get KCharacterType of character at position <em>nPos</em>.
106 long getCharacterType( [in] string aText, [in] long nPos,
107 [in] com::sun::star::lang::Locale aLocale );
109 /** Get accumulated KCharacterTypes of string starting
110 at position <em>nPos</em> of length <em>nCount</em> code points.
112 @returns
113 A number with appropriate flags set to indicate what type of
114 characters the string contains, each flag value being one of
115 KCharacterType values.
117 long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
118 [in] com::sun::star::lang::Locale aLocale );
122 Parse a string for a token starting at position <em>nPos</em>.
124 <p> A name or identifier must match the
125 KParseTokens criteria passed in
126 <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
127 additionally contain characters of
128 <em>aUserDefinedCharactersStart</em> and/or
129 <em>aUserDefinedCharactersCont</em>. </p>
132 @returns
133 A filled ParseResult structure. If no
134 unambiguous token could be parsed,
135 ParseResult::TokenType will be set to
136 <b>0</b> (zero), other fields will contain the values parsed
137 so far.
139 <p> If a token may represent either a numeric value or a
140 name according to the passed Start/Cont-Flags/Chars, both
141 KParseType::ASC_NUM (or
142 KParseType::UNI_NUM) and
143 KParseType::IDENTNAME are set in
144 ParseResult::TokenType.
146 @param aText
147 Text to be parsed.
149 @param nPos
150 Position where parsing starts.
152 @param aLocale
153 The locale, for example, for decimal and group separator or
154 character type determination.
156 @param nStartCharFlags
157 A set of KParseTokens constants determining the
158 allowed characters a name or identifier may start with.
160 @param aUserDefinedCharactersStart
161 A set of additionally allowed characters a name or
162 identifier may start with.
164 @param nContCharFlags
165 A set of KParseTokens constants determining the
166 allowed characters a name or identifier may continue with.
168 @param aUserDefinedCharactersCont
169 A set of additionally allowed characters a name or
170 identifier may continue with.
172 @code{.cpp}
173 using namespace ::com::sun::star::i18n;
174 // First character of an identifier may be any alphabetic or underscore.
175 sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
176 // Continuing characters may be any alphanumeric or underscore or dot.
177 sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
178 // No further characters assumed to be contained in an identifier
179 OUString aEmptyString;
180 // Parse any token.
181 ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
182 nStartFlags, aEmptyString, nContFlags, aEmptyString );
183 // Get parsed token.
184 if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
185 fValue = rRes.Value;
186 if ( rRes.TokenType & KParseType::IDENTNAME )
187 aName = aText.copy( nPos, rRes.EndPos - nPos );
188 else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
189 aName = rRes.DequotedNameOrString;
190 else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
191 aString = rRes.DequotedNameOrString;
192 else if ( rRes.TokenType & KParseType::BOOLEAN )
193 aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
194 else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
195 aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
196 @endcode
199 ParseResult parseAnyToken(
200 [in] string aText,
201 [in] long nPos,
202 [in] com::sun::star::lang::Locale aLocale,
203 [in] long nStartCharFlags,
204 [in] string aUserDefinedCharactersStart,
205 [in] long nContCharFlags,
206 [in] string aUserDefinedCharactersCont
210 Parse a string for a token of type <em>nTokenType</em> starting
211 at position <em>nPos</em>.
213 <p> Other parameters are the same as in
214 parseAnyToken(). If the actual token does not
215 match the passed <em>nTokenType</em> a
216 ParseResult::TokenType set to <b>0</b> (zero)
217 is returned. </p>
219 @param nTokenType
220 One or more of the KParseType constants.
222 @param aText
223 See #parseAnyToken
224 @param nPos
225 See #parseAnyToken
226 @param aLocale
227 See #parseAnyToken
228 @param nStartCharFlags
229 See #parseAnyToken
230 @param aUserDefinedCharactersStart
231 See #parseAnyToken
232 @param nContCharFlags
233 See #parseAnyToken
234 @param aUserDefinedCharactersCont
235 See #parseAnyToken
237 @code{.cpp}
238 // Determine if a given name is a valid name (not quoted) and contains
239 // only allowed characters.
240 using namespace ::com::sun::star::i18n;
241 // First character of an identifier may be any alphanumeric or underscore.
242 sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
243 // No further characters assumed to be contained in an identifier start.
244 OUString aEmptyString;
245 // Continuing characters may be any alphanumeric or underscore.
246 sal_Int32 nContFlags = nStartFlags;
247 // Additionally, continuing characters may contain a blank.
248 OUString aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
249 // Parse predefined (must be an IDENTNAME) token.
250 ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
251 nStartFlags, aEmptyString, nContFlags, aContChars );
252 // Test if it is an identifier name and if it only is one
253 // and no more else is following it.
254 bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
255 @endcode
258 ParseResult parsePredefinedToken(
259 [in] long nTokenType,
260 [in] string aText,
261 [in] long nPos,
262 [in] com::sun::star::lang::Locale aLocale,
263 [in] long nStartCharFlags,
264 [in] string aUserDefinedCharactersStart,
265 [in] long nContCharFlags,
266 [in] string aUserDefinedCharactersCont
270 }; }; }; };
272 #endif
274 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */