offapi/com/sun/star/i18n/XCharacterClassification.idl

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*************************************************************************
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * Copyright 2000, 2010 Oracle and/or its affiliates.
   7  *
   8  * OpenOffice.org - a multi-platform office productivity suite
   9  *
  10  * This file is part of OpenOffice.org.
  11  *
  12  * OpenOffice.org is free software: you can redistribute it and/or modify
  13  * it under the terms of the GNU Lesser General Public License version 3
  14  * only, as published by the Free Software Foundation.
  15  *
  16  * OpenOffice.org is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU Lesser General Public License version 3 for more details
  20  * (a copy is included in the LICENSE file that accompanied this code).
  21  *
  22  * You should have received a copy of the GNU Lesser General Public License
  23  * version 3 along with OpenOffice.org.  If not, see
  24  * <http://www.openoffice.org/license.html>
  25  * for a copy of the LGPLv3 License.
  26  *
  27  ************************************************************************/
  28
  29 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
  30 #define __com_sun_star_i18n_XCharacterClassification_idl__
  31
  32 #include <com/sun/star/i18n/ParseResult.idl>
  33 #include <com/sun/star/lang/Locale.idl>
  34 #include <com/sun/star/uno/XInterface.idl>
  35
  36 //============================================================================
  37
  38 module com { module sun { module star { module i18n {
  39
  40 //============================================================================
  41
  42 /*
  43
  44 Possible tokens to be parsed with  parse...Token():
  45
  46 UPASCALPHA=[A-Z]
  47 LOASCALPHA=[a-z]
  48 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
  49 ASCDIGIT=[0-9]
  50 ASC_UNDERSCORE='_'
  51 ASC_SPACE=' '
  52 ASC_HT='\0x9'
  53 ASC_VT='\0xb'
  54 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
  55 ASC_DBL_QUOTE=\";
  56 ASC_QUOTE=\'
  57 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
  58
  59 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
  60 ALNUM=ALPHA|DIGIT
  61 CHAR=anycharacter
  62 WS=isWhiteSpace()
  63 SIGN='+'|'-'
  64 DECSEP=<locale dependent decimal separator>
  65 GRPSEP=<locale dependent thousand separator>
  66 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
  67
  68 IDENTIFIER=ALPHA *ALNUM
  69 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
  70 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
  71 ANY_NAME=1*(ALNUM|DEFCHARS)
  72 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
  73 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
  74 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
  75 NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
  76
  77 */
  78
  79 //============================================================================
  80
  81 /**
  82     Character classification (upper, lower, digit, letter, number, ...)
  83     and generic Unicode enabled parser.
  84  */
  85
  86 published interface XCharacterClassification : com::sun::star::uno::XInterface
  87 {
  88     //------------------------------------------------------------------------
  89     /** Convert lower case alpha to upper case alpha, starting at
  90         position <em>nPos</em> for <em>nCount</em> code points.
  91      */
  92     string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
  93                       [in] com::sun::star::lang::Locale aLocale );
  94
  95     //------------------------------------------------------------------------
  96     /** Convert upper case alpha to lower case alpha, starting at
  97         position <em>nPos</em> for <em>nCount</em> code points.
  98      */
  99     string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
 100                       [in] com::sun::star::lang::Locale aLocale );
 101
 102     //------------------------------------------------------------------------
 103     /** Convert to title case, starting at
 104         position <em>nPos</em> for <em>nCount</em> code points.
 105      */
 106     string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
 107                       [in] com::sun::star::lang::Locale aLocale );
 108
 109     //------------------------------------------------------------------------
 110     /// Get <type>UnicodeType</type> of character at position <em>nPos</em>.
 111     short    getType( [in] string aText, [in] long nPos );
 112
 113     //------------------------------------------------------------------------
 114     /** Get <type>DirectionProperty</type> of character at position
 115         <em>nPos</em>.
 116      */
 117     short    getCharacterDirection( [in] string aText, [in] long nPos );
 118
 119     //------------------------------------------------------------------------
 120     /// Get <type>UnicodeScript</type> of character at position <em>nPos</em>.
 121     short    getScript( [in] string aText, [in] long nPos );
 122
 123     //------------------------------------------------------------------------
 124     /// Get <type>KCharacterType</type> of character at position <em>nPos</em>.
 125     long getCharacterType( [in] string aText, [in] long nPos,
 126                            [in] com::sun::star::lang::Locale aLocale );
 127
 128     //------------------------------------------------------------------------
 129     /** Get accumulated <type>KCharacterType</type>s of string starting
 130         at position <em>nPos</em> of length <em>nCount</em> code points.
 131
 132         @returns
 133             A number with appropriate flags set to indicate what type of
 134             characters the string contains, each flag value being one of
 135             KCharacterType values.
 136     */
 137     long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
 138                         [in] com::sun::star::lang::Locale aLocale );
 139
 140
 141     //------------------------------------------------------------------------
 142     /**
 143         Parse a string for a token starting at position <em>nPos</em>.
 144
 145         <p> A name or identifier must match the
 146         <type>KParseTokens</type> criteria passed in
 147         <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
 148         additionally contain characters of
 149         <em>aUserDefinedCharactersStart</em> and/or
 150         <em>aUserDefinedCharactersCont</em>. </p>
 151
 152
 153         @returns
 154             A filled <type>ParseResult</type> structure. If no
 155             unambiguous token could be parsed,
 156             <member>ParseResult::TokenType</member> will be set to
 157             <b>0</b> (zero), other fields will contain the values parsed
 158             so far.
 159
 160             <p> If a token may represent either a numeric value or a
 161             name according to the passed Start/Cont-Flags/Chars, both
 162             <const>KParseType::ASC_NUM</const> (or
 163             <const>KParseType::UNI_NUM</const>) and
 164             <const>KParseType::IDENTNAME</const> are set in
 165             <member>ParseResult::TokenType</member>.
 166
 167         @param  aText
 168             Text to be parsed.
 169
 170         @param  nPos
 171             Position where parsing starts.
 172
 173         @param  aLocale
 174             The locale, for example, for decimal and group separator or
 175             character type determination.
 176
 177         @param  nStartCharFlags
 178             A set of <type>KParseTokens</type> constants determining the
 179             allowed characters a name or identifier may start with.
 180
 181         @param  aUserDefinedCharactersStart
 182             A set of additionally allowed characters a name or
 183             identifier may start with.
 184
 185         @param  nContCharFlags
 186             A set of <type>KParseTokens</type> constants determining the
 187             allowed characters a name or identifier may continue with.
 188
 189         @param  aUserDefinedCharactersCont
 190             A set of additionally allowed characters a name or
 191             identifier may continue with.
 192
 193         @example:C++
 194         <listing>
 195             using namespace ::com::sun::star::i18n;
 196             // First character of an identifier may be any alphabetic or underscore.
 197             sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
 198             // Continuing characters may be any alphanumeric or underscore or dot.
 199             sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
 200             // No further characters assumed to be contained in an identifier
 201             String aEmptyString;
 202             // Parse any token.
 203             ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
 204                 nStartFlags, aEmptyString, nContFlags, aEmptyString );
 205             // Get parsed token.
 206             if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
 207                 fValue = rRes.Value;
 208             if ( rRes.TokenType & KParseType::IDENTNAME )
 209                 aName = aText.Copy( nPos, rRes.EndPos - nPos );
 210             else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
 211                 aName = rRes.DequotedNameOrString;
 212             else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
 213                 aString = rRes.DequotedNameOrString;
 214             else if ( rRes.TokenType & KParseType::BOOLEAN )
 215                 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
 216             else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
 217                 aSymbol = aText.Copy( nPos, rRes.EndPos - nPos );
 218         </listing>
 219      */
 220
 221     ParseResult parseAnyToken(
 222                             [in] string aText,
 223                             [in] long nPos,
 224                             [in] com::sun::star::lang::Locale aLocale,
 225                             [in] long nStartCharFlags,
 226                             [in] string aUserDefinedCharactersStart,
 227                             [in] long nContCharFlags,
 228                             [in] string aUserDefinedCharactersCont
 229                             );
 230
 231     //------------------------------------------------------------------------
 232     /**
 233         Parse a string for a token of type <em>nTokenType</em> starting
 234         at position <em>nPos</em>.
 235
 236         <p> Other parameters are the same as in
 237         <member>parseAnyToken</member>. If the actual token does not
 238         match the passed <em>nTokenType</em> a
 239         <member>ParseResult::TokenType</member> set to <b>0</b> (zero)
 240         is returned. </p>
 241
 242         @param  nTokenType
 243             One or more of the <type>KParseType</type> constants.
 244
 245         @example:C++
 246         <listing>
 247             // Determine if a given name is a valid name (not quoted) and contains
 248             // only allowed characters.
 249             using namespace ::com::sun::star::i18n;
 250             // First character of an identifier may be any alphanumeric or underscore.
 251             sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
 252             // No further characters assumed to be contained in an identifier start.
 253             String aEmptyString;
 254             // Continuing characters may be any alphanumeric or underscore.
 255             sal_Int32 nContFlags = nStartFlags;
 256             // Additionally, continuing characters may contain a blank.
 257             String aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
 258             // Parse predefined (must be an IDENTNAME) token.
 259             ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
 260                 nStartFlags, aEmptyString, nContFlags, aContChars );
 261             // Test if it is an identifier name and if it only is one
 262             // and no more else is following it.
 263             bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
 264         </listing>
 265      */
 266
 267     ParseResult parsePredefinedToken(
 268                             [in] long nTokenType,
 269                             [in] string aText,
 270                             [in] long nPos,
 271                             [in] com::sun::star::lang::Locale aLocale,
 272                             [in] long nStartCharFlags,
 273                             [in] string aUserDefinedCharactersStart,
 274                             [in] long nContCharFlags,
 275                             [in] string aUserDefinedCharactersCont
 276                             );
 277 };
 278
 279 //=============================================================================
 280 }; }; }; };
 281
 282 #endif
 283
 284 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */