1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
30 #define __com_sun_star_i18n_XCharacterClassification_idl__
32 #include
<com
/sun
/star
/i18n
/ParseResult.idl
>
33 #include
<com
/sun
/star
/lang
/Locale.idl
>
34 #include
<com
/sun
/star
/uno
/XInterface.idl
>
36 //============================================================================
38 module com
{ module sun
{ module star
{ module i18n
{
40 //============================================================================
44 Possible tokens to be parsed with parse...Token():
48 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
54 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
57 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
59 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
64 DECSEP=<locale dependent decimal separator>
65 GRPSEP=<locale dependent thousand separator>
66 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
68 IDENTIFIER=ALPHA *ALNUM
69 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
70 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
71 ANY_NAME=1*(ALNUM|DEFCHARS)
72 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
73 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
74 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
75 NUMBER=[SIGN]*(1*DIGIT *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
79 //============================================================================
82 Character classification (upper, lower, digit, letter, number, ...)
83 and generic Unicode enabled parser.
86 published
interface XCharacterClassification
: com
::sun
::star
::uno
::XInterface
88 //------------------------------------------------------------------------
89 /** Convert lower case alpha to upper case alpha, starting at
90 position <em>nPos</em> for <em>nCount</em> code points.
92 string toUpper
( [in] string aText
, [in] long nPos
, [in] long nCount
,
93 [in] com
::sun
::star
::lang
::Locale aLocale
);
95 //------------------------------------------------------------------------
96 /** Convert upper case alpha to lower case alpha, starting at
97 position <em>nPos</em> for <em>nCount</em> code points.
99 string toLower
( [in] string aText
, [in] long nPos
, [in] long nCount
,
100 [in] com
::sun
::star
::lang
::Locale aLocale
);
102 //------------------------------------------------------------------------
103 /** Convert to title case, starting at
104 position <em>nPos</em> for <em>nCount</em> code points.
106 string toTitle
( [in] string aText
, [in] long nPos
, [in] long nCount
,
107 [in] com
::sun
::star
::lang
::Locale aLocale
);
109 //------------------------------------------------------------------------
110 /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
111 short getType
( [in] string aText
, [in] long nPos
);
113 //------------------------------------------------------------------------
114 /** Get <type>DirectionProperty</type> of character at position
117 short getCharacterDirection
( [in] string aText
, [in] long nPos
);
119 //------------------------------------------------------------------------
120 /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
121 short getScript
( [in] string aText
, [in] long nPos
);
123 //------------------------------------------------------------------------
124 /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
125 long getCharacterType
( [in] string aText
, [in] long nPos
,
126 [in] com
::sun
::star
::lang
::Locale aLocale
);
128 //------------------------------------------------------------------------
129 /** Get accumulated <type>KCharacterType</type>s of string starting
130 at position <em>nPos</em> of length <em>nCount</em> code points.
133 A number with appropriate flags set to indicate what type of
134 characters the string contains, each flag value being one of
135 KCharacterType values.
137 long getStringType
( [in] string aText
, [in] long nPos
, [in] long nCount
,
138 [in] com
::sun
::star
::lang
::Locale aLocale
);
141 //------------------------------------------------------------------------
143 Parse a string for a token starting at position <em>nPos</em>.
145 <p> A name or identifier must match the
146 <type>KParseTokens</type> criteria passed in
147 <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
148 additionally contain characters of
149 <em>aUserDefinedCharactersStart</em> and/or
150 <em>aUserDefinedCharactersCont</em>. </p>
154 A filled <type>ParseResult</type> structure. If no
155 unambiguous token could be parsed,
156 <member>ParseResult::TokenType</member> will be set to
157 <b>0</b> (zero), other fields will contain the values parsed
160 <p> If a token may represent either a numeric value or a
161 name according to the passed Start/Cont-Flags/Chars, both
162 <const>KParseType::ASC_NUM</const> (or
163 <const>KParseType::UNI_NUM</const>) and
164 <const>KParseType::IDENTNAME</const> are set in
165 <member>ParseResult::TokenType</member>.
171 Position where parsing starts.
174 The locale, for example, for decimal and group separator or
175 character type determination.
177 @param nStartCharFlags
178 A set of <type>KParseTokens</type> constants determining the
179 allowed characters a name or identifier may start with.
181 @param aUserDefinedCharactersStart
182 A set of additionally allowed characters a name or
183 identifier may start with.
185 @param nContCharFlags
186 A set of <type>KParseTokens</type> constants determining the
187 allowed characters a name or identifier may continue with.
189 @param aUserDefinedCharactersCont
190 A set of additionally allowed characters a name or
191 identifier may continue with.
195 using namespace ::com::sun::star::i18n;
196 // First character of an identifier may be any alphabetic or underscore.
197 sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
198 // Continuing characters may be any alphanumeric or underscore or dot.
199 sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
200 // No further characters assumed to be contained in an identifier
203 ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
204 nStartFlags, aEmptyString, nContFlags, aEmptyString );
206 if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
208 if ( rRes.TokenType & KParseType::IDENTNAME )
209 aName = aText.Copy( nPos, rRes.EndPos - nPos );
210 else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
211 aName = rRes.DequotedNameOrString;
212 else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
213 aString = rRes.DequotedNameOrString;
214 else if ( rRes.TokenType & KParseType::BOOLEAN )
215 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
216 else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
217 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
221 ParseResult parseAnyToken
(
224 [in] com
::sun
::star
::lang
::Locale aLocale
,
225 [in] long nStartCharFlags
,
226 [in] string aUserDefinedCharactersStart
,
227 [in] long nContCharFlags
,
228 [in] string aUserDefinedCharactersCont
231 //------------------------------------------------------------------------
233 Parse a string for a token of type <em>nTokenType</em> starting
234 at position <em>nPos</em>.
236 <p> Other parameters are the same as in
237 <member>parseAnyToken</member>. If the actual token does not
238 match the passed <em>nTokenType</em> a
239 <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
243 One or more of the <type>KParseType</type> constants.
247 // Determine if a given name is a valid name (not quoted) and contains
248 // only allowed characters.
249 using namespace ::com::sun::star::i18n;
250 // First character of an identifier may be any alphanumeric or underscore.
251 sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
252 // No further characters assumed to be contained in an identifier start.
254 // Continuing characters may be any alphanumeric or underscore.
255 sal_Int32 nContFlags = nStartFlags;
256 // Additionally, continuing characters may contain a blank.
257 String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
258 // Parse predefined (must be an IDENTNAME) token.
259 ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
260 nStartFlags, aEmptyString, nContFlags, aContChars );
261 // Test if it is an identifier name and if it only is one
262 // and no more else is following it.
263 bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
267 ParseResult parsePredefinedToken
(
268 [in] long nTokenType
,
271 [in] com
::sun
::star
::lang
::Locale aLocale
,
272 [in] long nStartCharFlags
,
273 [in] string aUserDefinedCharactersStart
,
274 [in] long nContCharFlags
,
275 [in] string aUserDefinedCharactersCont
279 //=============================================================================
284 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */