tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / lingucomponent / source / hyphenator / hyphen / hyphenimp.cxx
blob1e527c8e8adafdd2d9ad301ebe191d1dc7c1c2c6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <com/sun/star/uno/Reference.h>
22 #include <comphelper/sequence.hxx>
23 #include <comphelper/processfactory.hxx>
24 #include <cppuhelper/factory.hxx>
25 #include <cppuhelper/supportsservice.hxx>
26 #include <cppuhelper/weak.hxx>
27 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
28 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
29 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
30 #include <i18nlangtag/languagetag.hxx>
31 #include <tools/debug.hxx>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.h>
35 #include <hyphen.h>
36 #include "hyphenimp.hxx"
38 #include <linguistic/hyphdta.hxx>
39 #include <rtl/ustring.hxx>
40 #include <rtl/ustrbuf.hxx>
41 #include <rtl/textenc.h>
42 #include <sal/log.hxx>
44 #include <linguistic/misc.hxx>
45 #include <svtools/strings.hrc>
46 #include <unotools/charclass.hxx>
47 #include <unotools/lingucfg.hxx>
48 #include <unotools/resmgr.hxx>
49 #include <osl/file.hxx>
51 #include <stdio.h>
52 #include <string.h>
54 #include <cassert>
55 #include <numeric>
56 #include <vector>
57 #include <set>
58 #include <memory>
59 #include <o3tl/string_view.hxx>
61 // XML-header to query SPELLML support
62 constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>";
64 using namespace osl;
65 using namespace com::sun::star;
66 using namespace com::sun::star::beans;
67 using namespace com::sun::star::lang;
68 using namespace com::sun::star::uno;
69 using namespace com::sun::star::linguistic2;
70 using namespace linguistic;
72 static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
74 const uno::Reference< XComponentContext >& xContext( comphelper::getProcessComponentContext() );
75 uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
76 return xRes;
79 Hyphenator::Hyphenator() :
80 aEvtListeners ( GetLinguMutex() )
82 bDisposing = false;
85 Hyphenator::~Hyphenator()
87 for (auto & rInfo : mvDicts)
89 if (rInfo.aPtr)
90 hnj_hyphen_free(rInfo.aPtr);
93 if (pPropHelper)
95 pPropHelper->RemoveAsPropListener();
99 PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
101 if (!pPropHelper)
103 Reference< XLinguProperties > xPropSet = GetLinguProperties();
105 pPropHelper.reset( new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet ) );
106 pPropHelper->AddAsPropListener(); //! after a reference is established
108 return *pPropHelper;
111 // Requires GetLinguMutex locked
112 void Hyphenator::ensureLocales()
114 // this routine should return the locales supported by the installed
115 // dictionaries.
116 if (mvDicts.empty())
118 SvtLinguConfig aLinguCfg;
120 // get list of dictionaries-to-use
121 // (or better speaking: the list of dictionaries using the
122 // new configuration entries).
123 std::vector< SvtLinguConfigDictionaryEntry > aDics;
124 uno::Sequence< OUString > aFormatList;
125 aLinguCfg.GetSupportedDictionaryFormatsFor( u"Hyphenators"_ustr,
126 u"org.openoffice.lingu.LibHnjHyphenator"_ustr, aFormatList );
127 for (const auto& rFormat : aFormatList)
129 std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
130 aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
131 aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
134 //!! for compatibility with old dictionaries (the ones not using extensions
135 //!! or new configuration entries, but still using the dictionary.lst file)
136 //!! Get the list of old style spell checking dictionaries to use...
137 std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
138 GetOldStyleDics( "HYPH" ) );
140 // to prefer dictionaries with configuration entries we will only
141 // use those old style dictionaries that add a language that
142 // is not yet supported by the list of new style dictionaries
143 MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
145 if (!aDics.empty())
147 // get supported locales from the dictionaries-to-use...
148 std::set<OUString> aLocaleNamesSet;
149 for (auto const& dict : aDics)
151 for (const auto& rLocaleName : dict.aLocaleNames)
153 aLocaleNamesSet.insert( rLocaleName );
156 // ... and add them to the resulting sequence
157 std::vector<Locale> aLocalesVec;
158 aLocalesVec.reserve(aLocaleNamesSet.size());
160 std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
161 [](const OUString& localeName) { return LanguageTag::convertToLocale(localeName); });
163 aSuppLocales = comphelper::containerToSequence(aLocalesVec);
165 //! For each dictionary and each locale we need a separate entry.
166 //! If this results in more than one dictionary per locale than (for now)
167 //! it is undefined which dictionary gets used.
168 //! In the future the implementation should support using several dictionaries
169 //! for one locale.
170 sal_Int32 numdict = std::accumulate(aDics.begin(), aDics.end(), 0,
171 [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
172 return nSum + dict.aLocaleNames.getLength(); });
174 // add dictionary information
175 mvDicts.resize(numdict);
177 sal_Int32 k = 0;
178 for (auto const& dict : aDics)
180 if (dict.aLocaleNames.hasElements() &&
181 dict.aLocations.hasElements())
183 // currently only one language per dictionary is supported in the actual implementation...
184 // Thus here we work-around this by adding the same dictionary several times.
185 // Once for each of its supported locales.
186 for (const auto& rLocaleName : dict.aLocaleNames)
188 LanguageTag aLanguageTag(rLocaleName);
189 mvDicts[k].aPtr = nullptr;
190 mvDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
191 mvDicts[k].aLoc = aLanguageTag.getLocale();
192 mvDicts[k].apCC.reset( new CharClass( std::move(aLanguageTag) ) );
193 // also both files have to be in the same directory and the
194 // file names must only differ in the extension (.aff/.dic).
195 // Thus we use the first location only and strip the extension part.
196 OUString aLocation = dict.aLocations[0];
197 sal_Int32 nPos = aLocation.lastIndexOf( '.' );
198 aLocation = aLocation.copy( 0, nPos );
199 mvDicts[k].aName = aLocation;
201 ++k;
205 DBG_ASSERT( k == numdict, "index mismatch?" );
207 else
209 // no dictionary found so register no dictionaries
210 mvDicts.clear();
211 aSuppLocales.realloc(0);
216 Sequence< Locale > SAL_CALL Hyphenator::getLocales()
218 MutexGuard aGuard(GetLinguMutex());
219 ensureLocales();
220 return aSuppLocales;
223 sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
225 MutexGuard aGuard( GetLinguMutex() );
226 ensureLocales();
227 return comphelper::findValue(aSuppLocales, rLocale) != -1;
230 namespace {
231 bool LoadDictionary(HDInfo& rDict)
233 OUString DictFN = rDict.aName + ".dic";
234 OUString dictpath;
236 osl::FileBase::getSystemPathFromFileURL(DictFN, dictpath);
238 #if defined(_WIN32)
239 // hnj_hyphen_load expects UTF-8 encoded paths with \\?\ long path prefix.
240 OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
241 #else
242 OString sTmp(OU2ENC(dictpath, osl_getThreadTextEncoding()));
243 #endif
244 HyphenDict* dict = hnj_hyphen_load(sTmp.getStr());
245 if (!dict)
247 SAL_WARN(
248 "lingucomponent",
249 "Couldn't find file " << dictpath);
250 return false;
252 rDict.aPtr = dict;
253 rDict.eEnc = getTextEncodingFromCharset(dict->cset);
254 return true;
257 OUString makeLowerCase(const OUString& aTerm, CharClass const* pCC)
259 if (pCC)
260 return pCC->lowercase(aTerm);
261 return aTerm;
264 OUString makeUpperCase(const OUString& aTerm, CharClass const* pCC)
266 if (pCC)
267 return pCC->uppercase(aTerm);
268 return aTerm;
271 OUString makeInitCap(const OUString& aTerm, CharClass const* pCC)
273 sal_Int32 tlen = aTerm.getLength();
274 if (pCC && tlen)
276 OUString bTemp = aTerm.copy(0, 1);
277 if (tlen > 1)
278 return (pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm, 1, (tlen - 1)));
280 return pCC->uppercase(bTemp, 0, 1);
282 return aTerm;
285 struct hyphenation_result
287 int n = 0;
288 bool failed = true;
289 char** rep = nullptr; // replacements of discretionary hyphenation
290 int* pos = nullptr; // array of [hyphenation point] minus [deletion position]
291 int* cut = nullptr; // length of deletions in original word
292 std::unique_ptr<char[]> hyphens;
294 ~hyphenation_result()
296 if (rep)
298 for (int i = 0; i < n; i++)
300 if (rep[i])
301 free(rep[i]);
303 free(rep);
305 if (pos)
306 free(pos);
307 if (cut)
308 free(cut);
312 hyphenation_result getHyphens(std::u16string_view word, const HDInfo& hdInfo, sal_Int16 minLead,
313 sal_Int16 minTrail)
315 // first convert any smart quotes or apostrophes to normal ones
316 OUStringBuffer aBuf(word);
317 for (sal_Int32 ix = 0; ix < aBuf.getLength(); ix++)
319 sal_Unicode ch = aBuf[ix];
320 if ((ch == 0x201C) || (ch == 0x201D))
321 aBuf[ix] = u'"';
322 if ((ch == 0x2018) || (ch == 0x2019))
323 aBuf[ix] = u'\'';
326 // now convert word to all lowercase for pattern recognition
327 OUString nTerm(makeLowerCase(OUString::unacquired(aBuf), hdInfo.apCC.get()));
329 // now convert word to needed encoding
330 OString encWord(OU2ENC(nTerm, hdInfo.eEnc));
332 // now strip off any ending periods
333 auto lastValidPos = std::string_view(encWord).find_last_not_of('.');
334 if (lastValidPos == std::string_view::npos)
335 return {};
337 int n = lastValidPos + 1;
338 std::unique_ptr<char[]> hyphens(new char[n + 5]);
339 char** rep = nullptr; // replacements of discretionary hyphenation
340 int* pos = nullptr; // array of [hyphenation point] minus [deletion position]
341 int* cut = nullptr; // length of deletions in original word
343 HyphenDict* dict = hdInfo.aPtr;
344 const bool failed = 0 != hnj_hyphen_hyphenate3( dict, encWord.getStr(), n, hyphens.get(), nullptr,
345 &rep, &pos, &cut, minLead, minTrail,
346 std::max<sal_Int16>(dict->clhmin, 2) + std::max(0, minLead - std::max<sal_Int16>(dict->lhmin, 2)),
347 std::max<sal_Int16>(dict->crhmin, 2) + std::max(0, minTrail - std::max<sal_Int16>(dict->rhmin, 2)) );
348 return { n, failed, rep, pos, cut, std::move(hyphens) }; // buffers will free in dtor
352 const HDInfo* Hyphenator::getMatchingDict(const css::lang::Locale& aLocale)
354 MutexGuard aGuard(GetLinguMutex());
355 ensureLocales();
356 auto it = std::find_if(mvDicts.rbegin(), mvDicts.rend(),
357 [&aLocale](auto& el) { return el.aLoc == aLocale; });
358 if (it == mvDicts.rend())
359 return nullptr;
361 // if this dictionary has not been loaded yet do that
362 if (!it->aPtr)
364 if (!LoadDictionary(*it))
365 return nullptr;
368 // we don't want to work with a default text encoding since following incorrect
369 // results may occur only for specific text and thus may be hard to notice.
370 // Thus better always make a clean exit here if the text encoding is in question.
371 // Hopefully something not working at all will raise proper attention quickly. ;-)
372 DBG_ASSERT(it->eEnc != RTL_TEXTENCODING_DONTKNOW,
373 "failed to get text encoding! (maybe incorrect encoding string in file)");
374 if (it->eEnc == RTL_TEXTENCODING_DONTKNOW)
375 return nullptr;
377 return &*it;
380 Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
381 const css::lang::Locale& aLocale,
382 sal_Int16 nMaxLeading,
383 const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
385 PropertyHelper_Hyphenation& rHelper = GetPropHelper();
386 rHelper.SetTmpPropVals(aProperties);
387 sal_Int16 minTrail = rHelper.GetMinTrailing();
388 sal_Int16 minLead = rHelper.GetMinLeading();
389 sal_Int16 minCompoundLead = rHelper.GetCompoundMinLeading();
390 sal_Int16 minLen = rHelper.GetMinWordLength();
391 bool bNoHyphenateCaps = rHelper.IsNoHyphenateCaps();
393 // if we have a hyphenation dictionary matching this locale
394 if (auto pHDInfo = getMatchingDict(aLocale))
396 int nHyphenationPos = -1;
397 int nHyphenationPosAlt = -1;
398 int nHyphenationPosAltHyph = -1;
400 // hyphenate the word with that dictionary
401 rtl_TextEncoding eEnc = pHDInfo->eEnc;
402 CharClass* pCC = pHDInfo->apCC.get();
404 // Don't hyphenate uppercase words if requested
405 if (bNoHyphenateCaps && aWord == makeUpperCase(aWord, pCC))
407 return nullptr;
410 CapType ct = capitalType(aWord, pCC);
412 auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail);
413 if (result.failed)
414 return nullptr;
416 sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
418 // use morphological analysis of Hunspell to get better hyphenation of compound words
419 // optionally when hyphenation zone is enabled
420 // pa: fields contain stems resulted by compound word analysis of non-dictionary words
421 // hy: fields contain hyphenation data of dictionary (compound) words
422 Reference< XSpellAlternatives > xTmpRes;
423 bool bAnalyzed = false; // enough the analyse once the word
424 bool bCompoundHyphenation = true; // try to hyphenate compound words better
425 OUString sStems; // processed result of the compound word analysis, e.g. com|pound|word
426 sal_Int32 nSuffixLen = 0; // do not remove break points in suffixes
428 for (sal_Int32 i = 0; i < result.n; i++)
430 int leftrep = 0;
431 bool hit = (result.n >= minLen);
432 if (!result.rep || !result.rep[i])
434 hit = hit && (result.hyphens[i] & 1) && (i < Leading);
435 hit = hit && (i >= (minLead-1) );
436 hit = hit && ((result.n - i - 1) >= minTrail);
438 else
440 // calculate change character length before hyphenation point signed with '='
441 for (char * c = result.rep[i]; *c && (*c != '='); c++)
443 if (eEnc == RTL_TEXTENCODING_UTF8)
445 if (static_cast<unsigned char>(*c) >> 6 != 2)
446 leftrep++;
448 else
449 leftrep++;
451 hit = hit && (result.hyphens[i] & 1) && ((i + leftrep - result.pos[i]) < Leading);
452 hit = hit && ((i + leftrep - result.pos[i]) >= (minLead-1) );
453 hit = hit && ((result.n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(result.rep[i])) - leftrep - 1) >= minTrail);
455 if (hit)
457 // skip hyphenation right after stem boundaries in compound words
458 // if minCompoundLead > 2 (default value: less than n=minCompoundLead character distance)
459 if ( bCompoundHyphenation && minCompoundLead > 2 && nHyphenationPos > -1 && i - nHyphenationPos < minCompoundLead )
461 uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
462 uno::Reference< XSpellChecker1 > xSpell;
464 LanguageType nLanguage = LinguLocaleToLanguage( aLocale );
466 xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
468 // get morphological analysis of the word
469 if ( ( bAnalyzed && xTmpRes.is() ) || ( xSpell.is() && xSpell->isValid(
470 SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage),
471 uno::Sequence< beans::PropertyValue >() ) ) )
473 if ( !bAnalyzed )
475 xTmpRes = xSpell->spell( "<?xml?><query type='analyze'><word>" +
476 aWord + "</word></query>",
477 static_cast<sal_uInt16>(nLanguage),
478 uno::Sequence< beans::PropertyValue >() );
479 bAnalyzed = true;
481 if (xTmpRes.is())
483 Sequence<OUString>seq = xTmpRes->getAlternatives();
484 if (seq.hasElements())
486 sal_Int32 nEndOfFirstAnalysis = seq[0].indexOf("</a>");
487 // FIXME use only the first analysis
488 OUString morph(
489 seq[0].copy(0, nEndOfFirstAnalysis));
491 // concatenate pa: fields, i.e. stems in the analysis:
492 // pa:stem1 pa:stem2 pa:stem3 -> stem1||stem2||stem3
493 sal_Int32 nPa = -1;
494 while ( (nPa = morph.indexOf(u" pa:", nPa + 1)) > -1 )
496 // use hy: field of the actual stem, if it exists
497 // pa:stem1 hy:st|em1 pa:stem2 -> st|em1||stem2
498 sal_Int32 nHy = morph.indexOf(u" hy:", nPa + 3);
499 sal_Int32 nPa2 = morph.indexOf(u" pa:", nPa + 3);
501 if ( nHy > -1 && ( nPa2 == -1 || nHy < nPa2 ) )
503 OUString sStems2(morph.getToken(1, ' ', nHy).copy(3));
504 if ( sStems2.indexOf('|') > -1 )
505 sStems += sStems2+ u"||";
506 else if ( sal_Int32 nBreak = o3tl::toInt32(sStems2) )
508 OUString sPa(morph.getToken(1, ' ', nPa).copy(3));
509 if ( nBreak < sPa.getLength() )
510 sStems += OUString::Concat(sPa.subView(0, nBreak)) + u"|" +
511 sPa.subView(nBreak);
514 else
516 OUString sPa(morph.getToken(1, ' ', nPa).copy(3));
518 // handle special case: missing pa: in morphological analysis
519 // before in-word suffixes (German, Sweden etc. dictionaries)
520 // (recognized by the single last pa:)
521 if (sStems.isEmpty() && nPa2 == -1 && aWord.endsWith(sPa))
523 sStems = OUString::Concat(aWord.subView(0, aWord.getLength() -
524 sPa.getLength())) + u"||" +
525 aWord.subView(aWord.getLength() -
526 sPa.getLength());
527 break;
530 sStems += sPa + "||";
532 // count suffix length
533 sal_Int32 nSt = morph.lastIndexOf(" st:");
534 if ( nSt > -1 )
536 sal_Int32 nStemLen =
537 o3tl::getToken(morph, 1, ' ', nSt).length() - 3;
538 if ( nStemLen < sPa.getLength() )
539 nSuffixLen = sPa.getLength() - nStemLen;
543 if ( nPa == -1 ) // getToken() can modify nPa
544 break;
547 // only hy:, but not pa:
548 if ( sStems.isEmpty() )
550 // check hy: (pre-defined hyphenation)
551 sal_Int32 nHy = morph.indexOf(" hy:");
552 if (nHy > -1)
554 sStems = morph.getToken(1, ' ', nHy).copy(3);
555 if ( sStems.indexOf('|') == -1 && sStems.indexOf('-') == -1 )
557 if ( sal_Int32 nBreak = o3tl::toInt32(sStems) )
559 if ( nBreak < aWord.getLength() )
560 sStems += OUString::Concat(aWord.subView(0, nBreak)) + u"|" +
561 aWord.subView(nBreak);
570 // handle string separated by |, e.g "program hy:pro|gram"
571 if ( sStems.indexOf('|') > -1 )
573 sal_Int32 nLetters = 0; // count not separator characters
574 sal_Int32 nSepPos = -1; // position of last character | used for stem boundaries
575 bool bWeightedSep = false; // double separator || = weighted stem boundary
576 sal_Int32 j = 0;
577 for (; j < sStems.getLength() && nLetters <= i; j++)
579 if ( sStems[j] == '|' )
581 bWeightedSep = nSepPos > -1 && (j - 1 == nSepPos);
582 nSepPos = j;
584 else if ( sStems[j] != '-' && sStems[j] != '=' && sStems[j] != '*' )
585 ++nLetters;
587 // skip break points near stem boundaries
588 if (
589 // there is a stem boundary before the actual break point
590 nSepPos > -1 &&
591 // and the break point is within a stem, i.e. not in the
592 // suffix of the last stem
593 i < aWord.getLength() - nSuffixLen - 1 &&
594 // and it is not another stem boundary
595 j + 1 < sStems.getLength() &&
596 ( sStems[j + 1] != u'|' ||
597 // except if it's only the previous was a weighted one
598 ( bWeightedSep && ( j + 2 == sStems.getLength() ||
599 sStems[j + 2] != u'|' ) ) ) )
601 continue;
604 else
605 // not a compound word
606 bCompoundHyphenation = false;
608 else
609 // no SPELLML support, no morphological analysis
610 bCompoundHyphenation = false;
613 nHyphenationPos = i;
614 if (result.rep && result.rep[i])
616 nHyphenationPosAlt = i - result.pos[i];
617 nHyphenationPosAltHyph = i + leftrep - result.pos[i];
622 Reference<XHyphenatedWord> xRes;
623 if (nHyphenationPos != -1)
625 if (result.rep && result.rep[nHyphenationPos])
627 // remove equal sign
628 char * s = result.rep[nHyphenationPos];
629 int eq = 0;
630 for (; *s; s++)
632 if (*s == '=') eq = 1;
633 if (eq) *s = *(s + 1);
635 OUString repHyphlow(result.rep[nHyphenationPos], strlen(result.rep[nHyphenationPos]), eEnc);
636 OUString repHyph;
637 switch (ct)
639 case CapType::ALLCAP:
641 repHyph = makeUpperCase(repHyphlow, pCC);
642 break;
644 case CapType::INITCAP:
646 if (nHyphenationPosAlt == -1)
647 repHyph = makeInitCap(repHyphlow, pCC);
648 else
649 repHyph = repHyphlow;
650 break;
652 default:
654 repHyph = repHyphlow;
655 break;
659 // handle shortening
660 sal_Int16 nPos = static_cast<sal_Int16>((nHyphenationPosAltHyph < nHyphenationPos) ?
661 nHyphenationPosAltHyph : nHyphenationPos);
662 // discretionary hyphenation
663 xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
664 aWord.replaceAt(nHyphenationPosAlt + 1, result.cut[nHyphenationPos], repHyph),
665 static_cast<sal_Int16>(nHyphenationPosAltHyph));
667 else
669 xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
670 static_cast<sal_Int16>(nHyphenationPos), aWord, static_cast<sal_Int16>(nHyphenationPos));
673 return xRes;
675 return nullptr;
678 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
679 const OUString& aWord,
680 const css::lang::Locale& aLocale,
681 sal_Int16 nIndex,
682 const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
684 // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
685 for (int extrachar = 1; extrachar <= 2; extrachar++)
687 Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
688 if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
689 return xRes;
691 return nullptr;
694 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
695 const css::lang::Locale& aLocale,
696 const css::uno::Sequence< css::beans::PropertyValue >& aProperties )
698 PropertyHelper_Hyphenation& rHelper = GetPropHelper();
699 rHelper.SetTmpPropVals(aProperties);
700 sal_Int16 minTrail = rHelper.GetMinTrailing();
701 sal_Int16 minLead = rHelper.GetMinLeading();
702 sal_Int16 minLen = rHelper.GetMinWordLength();
704 // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
705 // well as "hyphenate"
706 if (aWord.getLength() < minLen)
707 return nullptr;
709 // if we have a hyphenation dictionary matching this locale
710 if (auto pHDInfo = getMatchingDict(aLocale))
712 // hyphenate the word with that dictionary
713 auto result = getHyphens(aWord, *pHDInfo, minLead, minTrail);
714 if (result.failed)
715 return nullptr;
717 sal_Int32 nHyphCount = 0;
719 // FIXME: shouldn't we iterate code points instead?
720 for (sal_Int32 i = 0; i < aWord.getLength(); i++)
722 if (result.hyphens[i] & 1)
723 nHyphCount++;
726 Sequence< sal_Int16 > aHyphPos(nHyphCount);
727 sal_Int16 *pPos = aHyphPos.getArray();
728 OUStringBuffer hyphenatedWordBuffer;
729 nHyphCount = 0;
731 for (sal_Int32 i = 0; i < aWord.getLength(); i++)
733 hyphenatedWordBuffer.append(aWord[i]);
734 // hyphenation position
735 if (result.hyphens[i] & 1)
737 // linguistic::PossibleHyphens is stuck with
738 // css::uno::Sequence<sal_Int16> because of
739 // css.linguistic2.XPossibleHyphens.getHyphenationPositions, so
740 // any further positions need to be ignored:
741 assert(i >= SAL_MIN_INT16);
742 if (i > SAL_MAX_INT16)
744 SAL_WARN(
745 "lingucomponent",
746 "hyphen pos " << i << " > SAL_MAX_INT16 in \"" << aWord
747 << "\"");
748 continue;
750 pPos[nHyphCount] = i;
751 hyphenatedWordBuffer.append('=');
752 nHyphCount++;
756 OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
758 return PossibleHyphens::CreatePossibleHyphens(
759 aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
762 return nullptr;
765 sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
766 const Reference< XLinguServiceEventListener >& rxLstnr )
768 MutexGuard aGuard( GetLinguMutex() );
770 bool bRes = false;
771 if (!bDisposing && rxLstnr.is())
773 bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
775 return bRes;
778 sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
779 const Reference< XLinguServiceEventListener >& rxLstnr )
781 MutexGuard aGuard( GetLinguMutex() );
783 bool bRes = false;
784 if (!bDisposing && rxLstnr.is())
786 bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
788 return bRes;
791 OUString SAL_CALL Hyphenator::getServiceDisplayName(const Locale& rLocale)
793 std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
794 return Translate::get(STR_DESCRIPTION_LIBHYPHEN, loc);
797 void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
799 MutexGuard aGuard( GetLinguMutex() );
801 if (pPropHelper)
802 return;
804 sal_Int32 nLen = rArguments.getLength();
805 if (2 == nLen)
807 Reference< XLinguProperties > xPropSet;
808 rArguments.getConstArray()[0] >>= xPropSet;
809 // rArguments.getConstArray()[1] >>= xDicList;
811 //! Pointer allows for access of the non-UNO functions.
812 //! And the reference to the UNO-functions while increasing
813 //! the ref-count and will implicitly free the memory
814 //! when the object is no longer used.
815 pPropHelper.reset( new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet ) );
816 pPropHelper->AddAsPropListener(); //! after a reference is established
818 else {
819 OSL_FAIL( "wrong number of arguments in sequence" );
823 void SAL_CALL Hyphenator::dispose()
825 MutexGuard aGuard( GetLinguMutex() );
827 if (!bDisposing)
829 bDisposing = true;
830 EventObject aEvtObj( static_cast<XHyphenator *>(this) );
831 aEvtListeners.disposeAndClear( aEvtObj );
832 if (pPropHelper)
834 pPropHelper->RemoveAsPropListener();
835 pPropHelper.reset();
840 void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
842 MutexGuard aGuard( GetLinguMutex() );
844 if (!bDisposing && rxListener.is())
845 aEvtListeners.addInterface( rxListener );
848 void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
850 MutexGuard aGuard( GetLinguMutex() );
852 if (!bDisposing && rxListener.is())
853 aEvtListeners.removeInterface( rxListener );
856 // Service specific part
857 OUString SAL_CALL Hyphenator::getImplementationName()
859 return u"org.openoffice.lingu.LibHnjHyphenator"_ustr;
862 sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
864 return cppu::supportsService(this, ServiceName);
867 Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
869 return { SN_HYPHENATOR };
872 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
873 lingucomponent_Hyphenator_get_implementation(
874 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
876 return cppu::acquire(new Hyphenator());
880 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */