offapi/com/sun/star/i18n/XCharacterClassification.idl

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #ifndef __com_sun_star_i18n_XCharacterClassification_idl__
  21 #define __com_sun_star_i18n_XCharacterClassification_idl__
  22
  23 #include <com/sun/star/i18n/ParseResult.idl>
  24 #include <com/sun/star/lang/Locale.idl>
  25 #include <com/sun/star/uno/XInterface.idl>
  26
  27
  28 module com { module sun { module star { module i18n {
  29
  30
  31 /*
  32
  33 Possible tokens to be parsed with  parse...Token():
  34
  35 UPASCALPHA=[A-Z]
  36 LOASCALPHA=[a-z]
  37 ASCALPHA=1*(UPASCALPHA|LOASCALPHA)
  38 ASCDIGIT=[0-9]
  39 ASC_UNDERSCORE='_'
  40 ASC_SPACE=' '
  41 ASC_HT='\0x9'
  42 ASC_VT='\0xb'
  43 ASC_WS=ASC_SPACE|ASC_HT|ASC_VT
  44 ASC_DBL_QUOTE=\";
  45 ASC_QUOTE=\'
  46 UPASC_IDENTIFIER=UPASCALPHA *(UPASCALPHA|ASCDIGIT|ASC_UNDERSCORE)
  47
  48 ALPHA,DIGIT are the tokens which return true for isAlpha and isDigit
  49 ALNUM=ALPHA|DIGIT
  50 CHAR=anycharacter
  51 WS=isWhiteSpace()
  52 SIGN='+'|'-'
  53 DECSEP=<locale dependent decimal separator>
  54 GRPSEP=<locale dependent thousand separator>
  55 EXPONENT=(E|e)[SIGN]1*ASC_DIGIT
  56
  57 IDENTIFIER=ALPHA *ALNUM
  58 UIDENTIFIER=(ALPHA | ASC_UNDERSCORE) *(ALNUM|ASC_UNDERSCORE)
  59 ALPHA_NAME=ALPHA *(ALNUM|DEFCHARS)
  60 ANY_NAME=1*(ALNUM|DEFCHARS)
  61 SINGLE_QUOTE_NAME=ASC_QUOTE(1*CHAR)ASC_QUOTE
  62 DOUBLE_QUOTE_NAME=ASC_DBL_QUOTE(*CHAR)ASC_DBL_QUOTE
  63 ASC_NUMBER=[SIGN]*(1*ASC_DIGIT  *(GRPSEP 1*ASC_DIGIT))[DECSEP]1*ASC_DIGIT[EXPONENT]
  64 NUMBER=[SIGN]*(1*DIGIT  *(GRPSEP 1*DIGIT))[DECSEP]1*DIGIT[EXPONENT]
  65
  66 */
  67
  68
  69 /**
  70     Character classification (upper, lower, digit, letter, number, ...)
  71     and generic Unicode enabled parser.
  72  */
  73
  74 published interface XCharacterClassification : com::sun::star::uno::XInterface
  75 {
  76     /** Convert lower case alpha to upper case alpha, starting at
  77         position <em>nPos</em> for <em>nCount</em> code points.
  78      */
  79     string   toUpper( [in] string aText, [in] long nPos, [in] long nCount,
  80                       [in] com::sun::star::lang::Locale aLocale );
  81
  82     /** Convert upper case alpha to lower case alpha, starting at
  83         position <em>nPos</em> for <em>nCount</em> code points.
  84      */
  85     string   toLower( [in] string aText, [in] long nPos, [in] long nCount,
  86                       [in] com::sun::star::lang::Locale aLocale );
  87
  88     /** Convert to title case, starting at
  89         position <em>nPos</em> for <em>nCount</em> code points.
  90      */
  91     string   toTitle( [in] string aText, [in] long nPos, [in] long nCount,
  92                       [in] com::sun::star::lang::Locale aLocale );
  93
  94     /// Get UnicodeType of character at position <em>nPos</em>.
  95     short    getType( [in] string aText, [in] long nPos );
  96
  97     /** Get DirectionProperty of character at position
  98         <em>nPos</em>.
  99      */
 100     short    getCharacterDirection( [in] string aText, [in] long nPos );
 101
 102     /// Get UnicodeScript of character at position <em>nPos</em>.
 103     short    getScript( [in] string aText, [in] long nPos );
 104
 105     /// Get KCharacterType of character at position <em>nPos</em>.
 106     long getCharacterType( [in] string aText, [in] long nPos,
 107                            [in] com::sun::star::lang::Locale aLocale );
 108
 109     /** Get accumulated KCharacterTypes of string starting
 110         at position <em>nPos</em> of length <em>nCount</em> code points.
 111
 112         @returns
 113             A number with appropriate flags set to indicate what type of
 114             characters the string contains, each flag value being one of
 115             KCharacterType values.
 116     */
 117     long getStringType( [in] string aText, [in] long nPos, [in] long nCount,
 118                         [in] com::sun::star::lang::Locale aLocale );
 119
 120
 121     /**
 122         Parse a string for a token starting at position <em>nPos</em>.
 123
 124         <p> A name or identifier must match the
 125         KParseTokens criteria passed in
 126         <em>nStartCharFlags</em> and <em>nContCharFlags</em> and may
 127         additionally contain characters of
 128         <em>aUserDefinedCharactersStart</em> and/or
 129         <em>aUserDefinedCharactersCont</em>. </p>
 130
 131
 132         @returns
 133             A filled ParseResult structure. If no
 134             unambiguous token could be parsed,
 135             ParseResult::TokenType will be set to
 136             <b>0</b> (zero), other fields will contain the values parsed
 137             so far.
 138
 139             <p> If a token may represent either a numeric value or a
 140             name according to the passed Start/Cont-Flags/Chars, both
 141             KParseType::ASC_NUM (or
 142             KParseType::UNI_NUM) and
 143             KParseType::IDENTNAME are set in
 144             ParseResult::TokenType.
 145
 146         @param  aText
 147             Text to be parsed.
 148
 149         @param  nPos
 150             Position where parsing starts.
 151
 152         @param  aLocale
 153             The locale, for example, for decimal and group separator or
 154             character type determination.
 155
 156         @param  nStartCharFlags
 157             A set of KParseTokens constants determining the
 158             allowed characters a name or identifier may start with.
 159
 160         @param  aUserDefinedCharactersStart
 161             A set of additionally allowed characters a name or
 162             identifier may start with.
 163
 164         @param  nContCharFlags
 165             A set of KParseTokens constants determining the
 166             allowed characters a name or identifier may continue with.
 167
 168         @param  aUserDefinedCharactersCont
 169             A set of additionally allowed characters a name or
 170             identifier may continue with.
 171
 172         @code{.cpp}
 173             using namespace ::com::sun::star::i18n;
 174             // First character of an identifier may be any alphabetic or underscore.
 175             sal_Int32 nStartFlags = KParseTokens::ANY_ALPHA | KParseTokens::ASC_UNDERSCORE;
 176             // Continuing characters may be any alphanumeric or underscore or dot.
 177             sal_Int32 nContFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE | KParseTokens::ASC_DOT;
 178             // No further characters assumed to be contained in an identifier
 179             OUString aEmptyString;
 180             // Parse any token.
 181             ParseResult rRes = xCC->parseAnyToken( aText, nPos, aLocale,
 182                 nStartFlags, aEmptyString, nContFlags, aEmptyString );
 183             // Get parsed token.
 184             if ( rRes.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER) )
 185                 fValue = rRes.Value;
 186             if ( rRes.TokenType & KParseType::IDENTNAME )
 187                 aName = aText.copy( nPos, rRes.EndPos - nPos );
 188             else if ( rRes.TokenType & KParseType::SINGLE_QUOTE_NAME )
 189                 aName = rRes.DequotedNameOrString;
 190             else if ( rRes.TokenType & KParseType::DOUBLE_QUOTE_STRING )
 191                 aString = rRes.DequotedNameOrString;
 192             else if ( rRes.TokenType & KParseType::BOOLEAN )
 193                 aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
 194             else if ( rRes.TokenType & KParseType::ONE_SINGLE_CHAR )
 195                 aSymbol = aText.copy( nPos, rRes.EndPos - nPos );
 196         @endcode
 197      */
 198
 199     ParseResult parseAnyToken(
 200                             [in] string aText,
 201                             [in] long nPos,
 202                             [in] com::sun::star::lang::Locale aLocale,
 203                             [in] long nStartCharFlags,
 204                             [in] string aUserDefinedCharactersStart,
 205                             [in] long nContCharFlags,
 206                             [in] string aUserDefinedCharactersCont
 207                             );
 208
 209     /**
 210         Parse a string for a token of type <em>nTokenType</em> starting
 211         at position <em>nPos</em>.
 212
 213         <p> Other parameters are the same as in
 214         parseAnyToken(). If the actual token does not
 215         match the passed <em>nTokenType</em> a
 216         ParseResult::TokenType set to <b>0</b> (zero)
 217         is returned. </p>
 218
 219         @param  nTokenType
 220             One or more of the KParseType constants.
 221
 222         @param aText
 223             See #parseAnyToken
 224         @param nPos
 225             See #parseAnyToken
 226         @param aLocale
 227             See #parseAnyToken
 228         @param nStartCharFlags
 229             See #parseAnyToken
 230         @param aUserDefinedCharactersStart
 231             See #parseAnyToken
 232         @param nContCharFlags
 233             See #parseAnyToken
 234         @param aUserDefinedCharactersCont
 235             See #parseAnyToken
 236
 237         @code{.cpp}
 238             // Determine if a given name is a valid name (not quoted) and contains
 239             // only allowed characters.
 240             using namespace ::com::sun::star::i18n;
 241             // First character of an identifier may be any alphanumeric or underscore.
 242             sal_Int32 nStartFlags = KParseTokens::ANY_ALNUM | KParseTokens::ASC_UNDERSCORE;
 243             // No further characters assumed to be contained in an identifier start.
 244             OUString aEmptyString;
 245             // Continuing characters may be any alphanumeric or underscore.
 246             sal_Int32 nContFlags = nStartFlags;
 247             // Additionally, continuing characters may contain a blank.
 248             OUString aContChars( RTL_CONSTASCII_USTRINGPARAM(" ") );
 249             // Parse predefined (must be an IDENTNAME) token.
 250             ParseResult rRes = xCC->parsePredefinedToken( KParseType::IDENTNAME, rName, 0, aLocale,
 251                 nStartFlags, aEmptyString, nContFlags, aContChars );
 252             // Test if it is an identifier name and if it only is one
 253             // and no more else is following it.
 254             bValid = (rRes.TokenType & KParseType::IDENTNAME) && rRes.EndPos == rName.Len();
 255         @endcode
 256      */
 257
 258     ParseResult parsePredefinedToken(
 259                             [in] long nTokenType,
 260                             [in] string aText,
 261                             [in] long nPos,
 262                             [in] com::sun::star::lang::Locale aLocale,
 263                             [in] long nStartCharFlags,
 264                             [in] string aUserDefinedCharactersStart,
 265                             [in] long nContCharFlags,
 266                             [in] string aUserDefinedCharactersCont
 267                             );
 268 };
 269
 270 }; }; }; };
 271
 272 #endif
 273
 274 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */