1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <breakit.hxx>
21 #include <swtypes.hxx>
23 #include <com/sun/star/i18n/ScriptType.hpp>
24 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
25 #include <com/sun/star/i18n/BreakIterator.hpp>
26 #include <svl/languageoptions.hxx>
27 #include <unicode/uchar.h>
28 #include <unotools/localedatawrapper.hxx>
32 using namespace com::sun::star
;
34 SwBreakIt
* g_pBreakIt
= nullptr;
36 void SwBreakIt::Create_( const uno::Reference
<uno::XComponentContext
> & rxContext
)
39 g_pBreakIt
= new SwBreakIt( rxContext
);
42 void SwBreakIt::Delete_()
48 SwBreakIt
* SwBreakIt::Get()
53 SwBreakIt::SwBreakIt( uno::Reference
<uno::XComponentContext
> xContext
)
54 : m_xContext(std::move(xContext
))
55 , m_xBreak(i18n::BreakIterator::create(m_xContext
))
56 , m_aForbiddenLang(LANGUAGE_DONTKNOW
)
60 void SwBreakIt::GetLocale_( const LanguageType aLang
)
63 m_xLanguageTag
->reset(aLang
);
65 m_xLanguageTag
.reset(new LanguageTag(aLang
));
68 void SwBreakIt::GetLocale_( const LanguageTag
& rLanguageTag
)
71 *m_xLanguageTag
= rLanguageTag
;
73 m_xLanguageTag
.reset(new LanguageTag(rLanguageTag
));
76 void SwBreakIt::GetForbidden_( const LanguageType aLang
)
78 LocaleDataWrapper
aWrap(m_xContext
, GetLanguageTag(aLang
));
80 m_aForbiddenLang
= aLang
;
81 m_oForbidden
.emplace(aWrap
.getForbiddenCharacters());
84 sal_uInt16
SwBreakIt::GetRealScriptOfText( const OUString
& rText
, sal_Int32 nPos
) const
86 sal_uInt16 nScript
= i18n::ScriptType::WEAK
;
89 if( nPos
&& nPos
== rText
.getLength() )
94 nScript
= m_xBreak
->getScriptType(rText
, nPos
);
95 sal_Int32 nChgPos
= 0;
96 if (i18n::ScriptType::WEAK
== nScript
&& nPos
>= 0 && nPos
+ 1 < rText
.getLength())
98 // A weak character followed by a mark may be meant to combine with
99 // the mark, so prefer the following character's script
100 switch (u_charType(rText
[nPos
+ 1]))
102 case U_NON_SPACING_MARK
:
103 case U_ENCLOSING_MARK
:
104 case U_COMBINING_SPACING_MARK
:
105 nScript
= m_xBreak
->getScriptType(rText
, nPos
+1);
109 if( i18n::ScriptType::WEAK
== nScript
&& nPos
)
111 nChgPos
= m_xBreak
->beginOfScript(rText
, nPos
, nScript
);
113 nScript
= m_xBreak
->getScriptType(rText
, nChgPos
-1);
116 if( i18n::ScriptType::WEAK
== nScript
)
118 nChgPos
= m_xBreak
->endOfScript(rText
, nPos
, nScript
);
119 if( rText
.getLength() > nChgPos
&& 0 <= nChgPos
)
120 nScript
= m_xBreak
->getScriptType(rText
, nChgPos
);
123 if( i18n::ScriptType::WEAK
== nScript
)
124 nScript
= SvtLanguageOptions::GetI18NScriptTypeOfLanguage( GetAppLanguage() );
128 SvtScriptType
SwBreakIt::GetAllScriptsOfText( const OUString
& rText
) const
130 const SvtScriptType coAllScripts
= SvtScriptType::LATIN
|
131 SvtScriptType::ASIAN
|
132 SvtScriptType::COMPLEX
;
133 SvtScriptType nRet
= SvtScriptType::NONE
;
134 sal_uInt16 nScript
= 0;
135 if (!rText
.isEmpty())
137 for( sal_Int32 n
= 0, nEnd
= rText
.getLength(); n
< nEnd
;
138 n
= m_xBreak
->endOfScript(rText
, n
, nScript
) )
140 nScript
= m_xBreak
->getScriptType(rText
, n
);
143 case i18n::ScriptType::LATIN
: nRet
|= SvtScriptType::LATIN
; break;
144 case i18n::ScriptType::ASIAN
: nRet
|= SvtScriptType::ASIAN
; break;
145 case i18n::ScriptType::COMPLEX
: nRet
|= SvtScriptType::COMPLEX
; break;
146 case i18n::ScriptType::WEAK
:
147 if( nRet
== SvtScriptType::NONE
)
148 nRet
|= coAllScripts
;
151 if( coAllScripts
== nRet
)
158 sal_Int32
SwBreakIt::getGraphemeCount(const OUString
& rText
,
159 sal_Int32 nStart
, sal_Int32 nEnd
) const
161 sal_Int32 nGraphemeCount
= 0;
163 sal_Int32 nCurPos
= std::max(static_cast<sal_Int32
>(0), nStart
);
164 while (nCurPos
< nEnd
)
166 // fdo#49208 cheat and assume that nothing can combine with a space
167 // to form a single grapheme
168 if (rText
[nCurPos
] == ' ')
174 sal_Int32 nCount2
= 1;
175 nCurPos
= m_xBreak
->nextCharacters(rText
, nCurPos
, lang::Locale(),
176 i18n::CharacterIteratorMode::SKIPCELL
, nCount2
, nCount2
);
181 return nGraphemeCount
;
184 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */