1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <com/sun/star/i18n/XCharacterClassification.hpp>
22 #include <cppuhelper/implbase.hxx>
23 #include <com/sun/star/lang/XServiceInfo.hpp>
24 #include <rtl/ref.hxx>
26 #include <o3tl/typed_flags_set.hxx>
29 namespace com::sun::star::uno
{ class XComponentContext
; }
30 namespace com::sun::star::i18n
{ class XNativeNumberSupplier
; }
31 namespace com::sun::star::i18n
{ class XLocaleData5
; }
32 namespace i18npool
{ class Transliteration_casemapping
; }
35 /// Flag values of table.
36 enum class ParserFlags
: sal_uInt32
{
39 CHAR_BOOL
= 0x00000002,
40 CHAR_WORD
= 0x00000004,
41 CHAR_VALUE
= 0x00000008,
42 CHAR_STRING
= 0x00000010,
43 CHAR_DONTCARE
= 0x00000020,
46 WORD_SEP
= 0x00000100,
48 VALUE_SEP
= 0x00000400,
49 VALUE_EXP
= 0x00000800,
50 VALUE_SIGN
= 0x00001000,
51 VALUE_EXP_VALUE
= 0x00002000,
52 VALUE_DIGIT
= 0x00004000,
53 NAME_SEP
= 0x20000000,
54 STRING_SEP
= 0x40000000,
55 EXCLUDED
= 0x80000000,
58 template<> struct typed_flags
<ParserFlags
> : is_typed_flags
<ParserFlags
, 0xe0007fff> {};
64 class cclass_Unicode final
: public cppu::WeakImplHelper
< css::i18n::XCharacterClassification
, css::lang::XServiceInfo
>
67 cclass_Unicode(css::uno::Reference
< css::uno::XComponentContext
> xContext
);
68 virtual ~cclass_Unicode() override
;
70 virtual OUString SAL_CALL
toUpper( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
,
71 const css::lang::Locale
& rLocale
) override
;
72 virtual OUString SAL_CALL
toLower( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
,
73 const css::lang::Locale
& rLocale
) override
;
74 virtual OUString SAL_CALL
toTitle( const OUString
& Text
, sal_Int32 nPos
, sal_Int32 nCount
,
75 const css::lang::Locale
& rLocale
) override
;
76 virtual sal_Int16 SAL_CALL
getType( const OUString
& Text
, sal_Int32 nPos
) override
;
77 virtual sal_Int16 SAL_CALL
getCharacterDirection( const OUString
& Text
, sal_Int32 nPos
) override
;
78 virtual sal_Int16 SAL_CALL
getScript( const OUString
& Text
, sal_Int32 nPos
) override
;
79 virtual sal_Int32 SAL_CALL
getCharacterType( const OUString
& text
, sal_Int32 nPos
,
80 const css::lang::Locale
& rLocale
) override
;
81 virtual sal_Int32 SAL_CALL
getStringType( const OUString
& text
, sal_Int32 nPos
, sal_Int32 nCount
,
82 const css::lang::Locale
& rLocale
) override
;
83 virtual css::i18n::ParseResult SAL_CALL
parseAnyToken( const OUString
& Text
, sal_Int32 nPos
,
84 const css::lang::Locale
& rLocale
, sal_Int32 nStartCharFlags
, const OUString
& userDefinedCharactersStart
,
85 sal_Int32 nContCharFlags
, const OUString
& userDefinedCharactersCont
) override
;
86 virtual css::i18n::ParseResult SAL_CALL
parsePredefinedToken( sal_Int32 nTokenType
, const OUString
& Text
,
87 sal_Int32 nPos
, const css::lang::Locale
& rLocale
, sal_Int32 nStartCharFlags
,
88 const OUString
& userDefinedCharactersStart
, sal_Int32 nContCharFlags
,
89 const OUString
& userDefinedCharactersCont
) override
;
92 virtual OUString SAL_CALL
getImplementationName() override
;
93 virtual sal_Bool SAL_CALL
supportsService(const OUString
& ServiceName
) override
;
94 virtual css::uno::Sequence
< OUString
> SAL_CALL
getSupportedServiceNames() override
;
97 // These are performance sensitive, so we don't want to use locking and switch their state, so just
98 // have multiple copies.
99 rtl::Reference
<Transliteration_casemapping
> transToUpper
;
100 rtl::Reference
<Transliteration_casemapping
> transToLower
;
101 rtl::Reference
<Transliteration_casemapping
> transToTitle
;
103 // --- parser specific (implemented in cclass_unicode_parser.cxx) ---
107 ssGetChar
, // initial state; -> ssBounce, ssGetValue, ssRewindFromValue, ssGetWord, ssGetWordFirstChar, ssGetString, ssGetBool, ssStop
108 ssGetValue
, // -> ssBounce, ssRewindFromValue, ssStopBack, ssGetWord
109 ssGetWord
, // -> ssBounce, ssStop, ssStopBack
110 ssGetWordFirstChar
, // -> ssBounce, ssGetWord, ssStop, ssStopBack
111 ssGetString
, // -> ssBounce, ssStop
112 ssGetBool
, // -> ssBounce, ssStop, ssStopBack
113 ssRewindFromValue
, // -> ssBounce, ssGetValue, ssGetWord, ssGetWordFirstChar, ssGetString, ssGetBool, ssStop, ssIgnoreLeadingInRewind
114 ssIgnoreLeadingInRewind
, // -> ssBounce, ssGetValue, ssRewindFromValue, ssGetWord, ssGetWordFirstChar, ssGetString, ssGetBool, ssStop
115 ssStopBack
, // -> ssStop
116 ssBounce
, // -> ssStopBack
120 static const sal_uInt8 nDefCnt
;
121 static const ParserFlags pDefaultParserTable
[];
122 static const sal_Int32 pParseTokensType
[];
124 /// If and where c occurs in pStr
125 static const sal_Unicode
* StrChr( const sal_Unicode
* pStr
, sal_uInt32 c
);
128 css::uno::Reference
< css::uno::XComponentContext
> m_xContext
;
130 /// used for parser only
131 css::lang::Locale aParserLocale
;
132 css::uno::Reference
< css::i18n::XLocaleData5
> mxLocaleData
;
133 css::uno::Reference
< css::i18n::XNativeNumberSupplier
> xNatNumSup
;
134 OUString aStartChars
;
136 std::unique_ptr
<ParserFlags
[]> pTable
;
137 std::unique_ptr
<ParserFlags
[]> pStart
;
138 std::unique_ptr
<ParserFlags
[]> pCont
;
139 sal_Int32 nStartTypes
;
140 sal_Int32 nContTypes
;
141 sal_Unicode cGroupSep
;
142 sal_Unicode cDecimalSep
;
143 sal_Unicode cDecimalSepAlt
;
145 /// Get corresponding KParseTokens flag for a character
146 static sal_Int32
getParseTokensType(sal_uInt32 c
, bool isFirst
);
148 /// Access parser table flags.
149 ParserFlags
getFlags(sal_uInt32 c
, ScanState eState
);
151 /// Access parser flags via International and special definitions.
152 ParserFlags
getFlagsExtended(sal_uInt32 c
, ScanState eState
) const;
154 /// Access parser table flags for user defined start characters.
155 ParserFlags
getStartCharsFlags( sal_uInt32 c
);
157 /// Access parser table flags for user defined continuation characters.
158 ParserFlags
getContCharsFlags( sal_Unicode c
);
160 /// Setup parser table. Calls initParserTable() only if needed.
161 void setupParserTable( const css::lang::Locale
& rLocale
, sal_Int32 startCharTokenType
,
162 const OUString
& userDefinedCharactersStart
, sal_Int32 contCharTokenType
,
163 const OUString
& userDefinedCharactersCont
);
165 /// Init parser table.
166 void initParserTable( const css::lang::Locale
& rLocale
, sal_Int32 startCharTokenType
,
167 const OUString
& userDefinedCharactersStart
, sal_Int32 contCharTokenType
,
168 const OUString
& userDefinedCharactersCont
);
170 /// Destroy parser table.
171 void destroyParserTable();
174 void parseText( css::i18n::ParseResult
& r
, const OUString
& rText
, sal_Int32 nPos
,
175 sal_Int32 nTokenType
= 0xffffffff );
177 /// Setup International class, new'ed only if different from existing.
178 void setupInternational( const css::lang::Locale
& rLocale
);
180 /// Implementation of getCharacterType() for one single character
181 static sal_Int32
getCharType( std::u16string_view Text
, sal_Int32
*nPos
, sal_Int32 increment
);
187 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */