offapi/com/sun/star/i18n/XCharacterClassification.idl

   1 /*************************************************************************
   2  *
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * Copyright 2008 by Sun Microsystems, Inc.
   6  *
   7  * OpenOffice.org - a multi-platform office productivity suite
   8  *
   9  * $RCSfile: XCharacterClassification.idl,v $
  10  * $Revision: 1.15 $
  11  *
  12  * This file is part of OpenOffice.org.
  13  *
  14  * OpenOffice.org is free software: you can redistribute it and/or modify
  15  * it under the terms of the GNU Lesser General Public License version 3
  16  * only, as published by the Free Software Foundation.
  17  *
  18  * OpenOffice.org is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU Lesser General Public License version 3 for more details
  22  * (a copy is included in the LICENSE file that accompanied this code).
  23  *
  24  * You should have received a copy of the GNU Lesser General Public License
  25  * version 3 along with OpenOffice.org.  If not, see
  26  * <http://www.openoffice.org/license.html>
  27  * for a copy of the LGPLv3 License.
  28  *
  29  ************************************************************************/
  30
  31 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
  32 #define __com_sun_star_i18n_XCharacterClassification_idl__
  33
  34 #include <com/sun/star/i18n/ParseResult.idl>
  35
  36 #ifndef __com_sun_star_lang_Locale_idl__
  37 #include <com/sun/star/lang/Locale.idl>
  38 #endif
  39 #ifndef __com_sun_star_uno_XInterface_idl__
  40 #include <com/sun/star/uno/XInterface.idl>
  41 #endif
  42
  43 //============================================================================
  44
  45 module com { module sun { module star { module i18n {
  46
  47 //============================================================================
  48
  49 /*
  50
  51 Possible tokens to be parsed with  parse...Token():
  52
  53 UPASCALPHA=[A-Z]
  54 LOASCALPHA=[a-z]
  55 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
  56 ASCDIGIT=[0-9]
  57 ASC_UNDERSCORE='_'
  58 ASC_SPACE=' '
  59 ASC_HT='\0x9'
  60 ASC_VT='\0xb'
  61 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
  62 ASC_DBL_QUOTE=\";
  63 ASC_QUOTE=\'
  64 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
  65
  66 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
  67 ALNUM=ALPHA|DIGIT
  68 CHAR=anycharacter
  69 WS=isWhiteSpace()
  70 SIGN='+'|'-'
  71 DECSEP=<locale dependent decimal separator>
  72 GRPSEP=<locale dependent thousand separator>
  73 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
  74
  75 IDENTIFIER=ALPHA *ALNUM
  76 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
  77 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
  78 ANY_NAME=1*(ALNUM|DEFCHARS)
  79 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
  80 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
  81 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
  82 NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
  83
  84 */
  85
  86 //============================================================================
  87
  88 /**
  89     Character classification (upper, lower, digit, letter, number, ...)
  90     and generic Unicode enabled parser.
  91  */
  92
  93 published interface XCharacterClassification : com::sun::star::uno::XInterface
  94 {
  95     //------------------------------------------------------------------------
  96     /** Convert lower case alpha to upper case alpha, starting at
  97         position <em>nPos</em> for <em>nCount</em> code points.
  98      */
  99     string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
 100                       [in] com::sun::star::lang::Locale aLocale );
 101
 102     //------------------------------------------------------------------------
 103     /** Convert upper case alpha to lower case alpha, starting at
 104         position <em>nPos</em> for <em>nCount</em> code points.
 105      */
 106     string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
 107                       [in] com::sun::star::lang::Locale aLocale );
 108
 109     //------------------------------------------------------------------------
 110     /** Convert to title case, starting at
 111         position <em>nPos</em> for <em>nCount</em> code points.
 112      */
 113     string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
 114                       [in] com::sun::star::lang::Locale aLocale );
 115
 116     //------------------------------------------------------------------------
 117     /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
 118     short    getType( [in] string aText, [in] long nPos );
 119
 120     //------------------------------------------------------------------------
 121     /** Get <type>DirectionProperty</type> of character at position
 122         <em>nPos</em>.
 123      */
 124     short    getCharacterDirection( [in] string aText, [in] long nPos );
 125
 126     //------------------------------------------------------------------------
 127     /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
 128     short    getScript( [in] string aText, [in] long nPos );
 129
 130     //------------------------------------------------------------------------
 131     /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
 132     long getCharacterType( [in] string aText, [in] long nPos,
 133                            [in] com::sun::star::lang::Locale aLocale );
 134
 135     //------------------------------------------------------------------------
 136     /** Get accumulated <type>KCharacterType</type>s of string starting
 137         at position <em>nPos</em> of length <em>nCount</em> code points.
 138
 139         @returns
 140             A number with appropriate flags set to indicate what type of
 141             characters the string contains, each flag value being one of
 142             KCharacterType values.
 143     */
 144     long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
 145                         [in] com::sun::star::lang::Locale aLocale );
 146
 147
 148     //------------------------------------------------------------------------
 149     /**
 150         Parse a string for a token starting at position <em>nPos</em>.
 151
 152         <p> A name or identifier must match the
 153         <type>KParseTokens</type> criteria passed in
 154         <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
 155         additionally contain characters of
 156         <em>aUserDefinedCharactersStart</em> and/or
 157         <em>aUserDefinedCharactersCont</em>. </p>
 158
 159
 160         @returns
 161             A filled <type>ParseResult</type> structure. If no
 162             unambigous token could be parsed,
 163             <member>ParseResult::TokenType</member> will be set to
 164             <b>0</b> (zero), other fields will contain the values parsed
 165             so far.
 166
 167             <p> If a token may represent either a numeric value or a
 168             name according to the passed Start/Cont-Flags/Chars, both
 169             <const>KParseType::ASC_NUM</const> (or
 170             <const>KParseType::UNI_NUM</const>) and
 171             <const>KParseType::IDENTNAME</const> are set in
 172             <member>ParseResult::TokenType</member>.
 173
 174         @param  aText
 175             Text to be parsed.
 176
 177         @param  nPos
 178             Position where parsing starts.
 179
 180         @param  aLocale
 181             The locale, for example, for decimal and group separator or
 182             character type determination.
 183
 184         @param  nStartCharFlags
 185             A set of <type>KParseTokens</type> constants determining the
 186             allowed characters a name or identifier may start with.
 187
 188         @param  aUserDefinedCharactersStart
 189             A set of additionally allowed characters a name or
 190             identifier may start with.
 191
 192         @param  nContCharFlags
 193             A set of <type>KParseTokens</type> constants determining the
 194             allowed characters a name or identifier may continue with.
 195
 196         @param  aUserDefinedCharactersCont
 197             A set of additionally allowed characters a name or
 198             identifier may continue with.
 199
 200         @example:C++
 201         <listing>
 202             using namespace ::com::sun::star::i18n;
 203             // First character of an identifier may be any alphabetic or underscore.
 204             sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
 205             // Continuing characters may be any alphanumeric or underscore or dot.
 206             sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
 207             // No further characters assumed to be contained in an identifier
 208             String aEmptyString;
 209             // Parse any token.
 210             ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
 211                 nStartFlags, aEmptyString, nContFlags, aEmptyString );
 212             // Get parsed token.
 213             if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
 214                 fValue = rRes.Value;
 215             if ( rRes.TokenType & KParseType::IDENTNAME )
 216                 aName = aText.Copy( nPos, rRes.EndPos - nPos );
 217             else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
 218                 aName = rRes.DequotedNameOrString;
 219             else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
 220                 aString = rRes.DequotedNameOrString;
 221             else if ( rRes.TokenType & KParseType::BOOLEAN )
 222                 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
 223             else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
 224                 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
 225         </listing>
 226      */
 227
 228     ParseResult parseAnyToken(
 229                             [in] string aText,
 230                             [in] long nPos,
 231                             [in] com::sun::star::lang::Locale aLocale,
 232                             [in] long nStartCharFlags,
 233                             [in] string aUserDefinedCharactersStart,
 234                             [in] long nContCharFlags,
 235                             [in] string aUserDefinedCharactersCont
 236                             );
 237
 238     //------------------------------------------------------------------------
 239     /**
 240         Parse a string for a token of type <em>nTokenType</em> starting
 241         at position <em>nPos</em>.
 242
 243         <p> Other parameters are the same as in
 244         <member>parseAnyToken</member>. If the actual token does not
 245         match the passed <em>nTokenType</em> a
 246         <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
 247         is returned. </p>
 248
 249         @param  nTokenType
 250             One or more of the <type>KParseType</type> constants.
 251
 252         @example:C++
 253         <listing>
 254             // Determine if a given name is a valid name (not quoted) and contains
 255             // only allowed characters.
 256             using namespace ::com::sun::star::i18n;
 257             // First character of an identifier may be any alphanumeric or underscore.
 258             sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
 259             // No further characters assumed to be contained in an identifier start.
 260             String aEmptyString;
 261             // Continuing characters may be any alphanumeric or underscore.
 262             sal_Int32 nContFlags = nStartFlags;
 263             // Additionally, continuing characters may contain a blank.
 264             String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
 265             // Parse predefined (must be an IDENTNAME) token.
 266             ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
 267                 nStartFlags, aEmptyString, nContFlags, aContChars );
 268             // Test if it is an identifier name and if it only is one
 269             // and no more else is following it.
 270             bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
 271         </listing>
 272      */
 273
 274     ParseResult parsePredefinedToken(
 275                             [in] long nTokenType,
 276                             [in] string aText,
 277                             [in] long nPos,
 278                             [in] com::sun::star::lang::Locale aLocale,
 279                             [in] long nStartCharFlags,
 280                             [in] string aUserDefinedCharactersStart,
 281                             [in] long nContCharFlags,
 282                             [in] string aUserDefinedCharactersCont
 283                             );
 284 };
 285
 286 //=============================================================================
 287 }; }; }; };
 288
 289 #endif