1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
21 #define __com_sun_star_i18n_XCharacterClassification_idl__
23 #include
<com
/sun
/star
/i18n
/ParseResult.idl
>
24 #include
<com
/sun
/star
/lang
/Locale.idl
>
25 #include
<com
/sun
/star
/uno
/XInterface.idl
>
28 module com
{ module sun
{ module star
{ module i18n
{
33 Possible tokens to be parsed with parse...Token():
37 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
43 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
46 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
48 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
53 DECSEP=<locale dependent decimal separator>
54 GRPSEP=<locale dependent thousand separator>
55 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
57 IDENTIFIER=ALPHA *ALNUM
58 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
59 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
60 ANY_NAME=1*(ALNUM|DEFCHARS)
61 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
62 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
63 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
64 NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
70 Character classification (upper, lower, digit, letter, number, ...)
71 and generic Unicode enabled parser.
74 published
interface XCharacterClassification
: com
::sun
::star
::uno
::XInterface
76 /** Convert lower case alpha to upper case alpha, starting at
77 position <em>nPos</em> for <em>nCount</em> code points.
79 string toUpper
( [in] string aText
, [in] long nPos
, [in] long nCount
,
80 [in] com
::sun
::star
::lang
::Locale aLocale
);
82 /** Convert upper case alpha to lower case alpha, starting at
83 position <em>nPos</em> for <em>nCount</em> code points.
85 string toLower
( [in] string aText
, [in] long nPos
, [in] long nCount
,
86 [in] com
::sun
::star
::lang
::Locale aLocale
);
88 /** Convert to title case, starting at
89 position <em>nPos</em> for <em>nCount</em> code points.
91 string toTitle
( [in] string aText
, [in] long nPos
, [in] long nCount
,
92 [in] com
::sun
::star
::lang
::Locale aLocale
);
94 /// Get UnicodeType of character at position <em>nPos</em>.
95 short getType
( [in] string aText
, [in] long nPos
);
97 /** Get DirectionProperty of character at position
100 short getCharacterDirection
( [in] string aText
, [in] long nPos
);
102 /// Get UnicodeScript of character at position <em>nPos</em>.
103 short getScript
( [in] string aText
, [in] long nPos
);
105 /// Get KCharacterType of character at position <em>nPos</em>.
106 long getCharacterType
( [in] string aText
, [in] long nPos
,
107 [in] com
::sun
::star
::lang
::Locale aLocale
);
109 /** Get accumulated KCharacterTypes of string starting
110 at position <em>nPos</em> of length <em>nCount</em> code points.
113 A number with appropriate flags set to indicate what type of
114 characters the string contains, each flag value being one of
115 KCharacterType values.
117 long getStringType
( [in] string aText
, [in] long nPos
, [in] long nCount
,
118 [in] com
::sun
::star
::lang
::Locale aLocale
);
122 Parse a string for a token starting at position <em>nPos</em>.
124 <p> A name or identifier must match the
125 KParseTokens criteria passed in
126 <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
127 additionally contain characters of
128 <em>aUserDefinedCharactersStart</em> and/or
129 <em>aUserDefinedCharactersCont</em>. </p>
133 A filled ParseResult structure. If no
134 unambiguous token could be parsed,
135 ParseResult::TokenType will be set to
136 <b>0</b> (zero), other fields will contain the values parsed
139 <p> If a token may represent either a numeric value or a
140 name according to the passed Start/Cont-Flags/Chars, both
141 KParseType::ASC_NUM (or
142 KParseType::UNI_NUM) and
143 KParseType::IDENTNAME are set in
144 ParseResult::TokenType.
150 Position where parsing starts.
153 The locale, for example, for decimal and group separator or
154 character type determination.
156 @param nStartCharFlags
157 A set of KParseTokens constants determining the
158 allowed characters a name or identifier may start with.
160 @param aUserDefinedCharactersStart
161 A set of additionally allowed characters a name or
162 identifier may start with.
164 @param nContCharFlags
165 A set of KParseTokens constants determining the
166 allowed characters a name or identifier may continue with.
168 @param aUserDefinedCharactersCont
169 A set of additionally allowed characters a name or
170 identifier may continue with.
173 using namespace ::com::sun::star::i18n;
174 // First character of an identifier may be any alphabetic or underscore.
175 sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
176 // Continuing characters may be any alphanumeric or underscore or dot.
177 sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
178 // No further characters assumed to be contained in an identifier
179 OUString aEmptyString;
181 ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
182 nStartFlags, aEmptyString, nContFlags, aEmptyString );
184 if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
186 if ( rRes.TokenType & KParseType::IDENTNAME )
187 aName = aText.copy( nPos, rRes.EndPos - nPos );
188 else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
189 aName = rRes.DequotedNameOrString;
190 else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
191 aString = rRes.DequotedNameOrString;
192 else if ( rRes.TokenType & KParseType::BOOLEAN )
193 aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
194 else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
195 aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
199 ParseResult parseAnyToken
(
202 [in] com
::sun
::star
::lang
::Locale aLocale
,
203 [in] long nStartCharFlags
,
204 [in] string aUserDefinedCharactersStart
,
205 [in] long nContCharFlags
,
206 [in] string aUserDefinedCharactersCont
210 Parse a string for a token of type <em>nTokenType</em> starting
211 at position <em>nPos</em>.
213 <p> Other parameters are the same as in
214 parseAnyToken(). If the actual token does not
215 match the passed <em>nTokenType</em> a
216 ParseResult::TokenType set to <b>0</b> (zero)
220 One or more of the KParseType constants.
228 @param nStartCharFlags
230 @param aUserDefinedCharactersStart
232 @param nContCharFlags
234 @param aUserDefinedCharactersCont
238 // Determine if a given name is a valid name (not quoted) and contains
239 // only allowed characters.
240 using namespace ::com::sun::star::i18n;
241 // First character of an identifier may be any alphanumeric or underscore.
242 sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
243 // No further characters assumed to be contained in an identifier start.
244 OUString aEmptyString;
245 // Continuing characters may be any alphanumeric or underscore.
246 sal_Int32 nContFlags = nStartFlags;
247 // Additionally, continuing characters may contain a blank.
248 OUString aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
249 // Parse predefined (must be an IDENTNAME) token.
250 ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
251 nStartFlags, aEmptyString, nContFlags, aContChars );
252 // Test if it is an identifier name and if it only is one
253 // and no more else is following it.
254 bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
258 ParseResult parsePredefinedToken
(
259 [in] long nTokenType
,
262 [in] com
::sun
::star
::lang
::Locale aLocale
,
263 [in] long nStartCharFlags
,
264 [in] string aUserDefinedCharactersStart
,
265 [in] long nContCharFlags
,
266 [in] string aUserDefinedCharactersCont
274 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */