Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / unotools / source / i18n / textsearch.cxx
blobb7f104895f8c82dcd91dfe94c452591137391e44
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <sal/config.h>
22 #include <cstdlib>
23 #include <string_view>
25 #include <i18nlangtag/languagetag.hxx>
26 #include <i18nutil/searchopt.hxx>
27 #include <i18nutil/transliteration.hxx>
28 #include <com/sun/star/util/TextSearch2.hpp>
29 #include <com/sun/star/util/SearchAlgorithms2.hpp>
30 #include <com/sun/star/util/SearchFlags.hpp>
31 #include <unotools/charclass.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <unotools/textsearch.hxx>
34 #include <rtl/ustrbuf.hxx>
35 #include <comphelper/diagnose_ex.hxx>
36 #include <mutex>
38 using namespace ::com::sun::star::util;
39 using namespace ::com::sun::star::uno;
40 using namespace ::com::sun::star::lang;
42 namespace utl
45 SearchParam::SearchParam( const OUString &rText,
46 SearchType eType,
47 bool bCaseSensitive,
48 sal_uInt32 cWildEscChar,
49 bool bWildMatchSel )
51 sSrchStr = rText;
52 m_eSrchType = eType;
54 m_cWildEscChar = cWildEscChar;
56 m_bCaseSense = bCaseSensitive;
57 m_bWildMatchSel = bWildMatchSel;
60 SearchParam::SearchParam( const SearchParam& rParam )
62 sSrchStr = rParam.sSrchStr;
63 m_eSrchType = rParam.m_eSrchType;
65 m_cWildEscChar = rParam.m_cWildEscChar;
67 m_bCaseSense = rParam.m_bCaseSense;
68 m_bWildMatchSel = rParam.m_bWildMatchSel;
71 SearchParam::~SearchParam() {}
73 static bool lcl_Equals( const i18nutil::SearchOptions2& rSO1, const i18nutil::SearchOptions2& rSO2 )
75 return
76 rSO1.AlgorithmType2 == rSO2.AlgorithmType2 &&
77 rSO1.WildcardEscapeCharacter == rSO2.WildcardEscapeCharacter &&
78 rSO1.algorithmType == rSO2.algorithmType &&
79 rSO1.searchFlag == rSO2.searchFlag &&
80 rSO1.searchString == rSO2.searchString &&
81 rSO1.replaceString == rSO2.replaceString &&
82 rSO1.changedChars == rSO2.changedChars &&
83 rSO1.deletedChars == rSO2.deletedChars &&
84 rSO1.insertedChars == rSO2.insertedChars &&
85 rSO1.Locale.Language == rSO2.Locale.Language &&
86 rSO1.Locale.Country == rSO2.Locale.Country &&
87 rSO1.Locale.Variant == rSO2.Locale.Variant &&
88 rSO1.transliterateFlags == rSO2.transliterateFlags;
91 namespace
93 struct CachedTextSearch
95 std::mutex mutex;
96 i18nutil::SearchOptions2 Options;
97 css::uno::Reference< css::util::XTextSearch2 > xTextSearch;
101 Reference<XTextSearch2> TextSearch::getXTextSearch( const i18nutil::SearchOptions2& rPara )
103 static CachedTextSearch theCachedTextSearch;
105 std::scoped_lock aGuard(theCachedTextSearch.mutex);
107 if ( lcl_Equals(theCachedTextSearch.Options, rPara) )
108 return theCachedTextSearch.xTextSearch;
110 Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
111 theCachedTextSearch.xTextSearch.set( ::TextSearch2::create(xContext) );
112 theCachedTextSearch.xTextSearch->setOptions2( rPara.toUnoSearchOptions2() );
113 theCachedTextSearch.Options = rPara;
115 return theCachedTextSearch.xTextSearch;
118 TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
120 if( LANGUAGE_NONE == eLang )
121 eLang = LANGUAGE_SYSTEM;
122 css::lang::Locale aLocale( LanguageTag::convertToLocale( eLang ) );
124 Init( rParam, aLocale);
127 TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
129 Init( rParam, rCClass.getLanguageTag().getLocale() );
132 TextSearch::TextSearch( const i18nutil::SearchOptions2& rPara )
134 xTextSearch = getXTextSearch( rPara );
137 i18nutil::SearchOptions2 TextSearch::UpgradeToSearchOptions2( const i18nutil::SearchOptions& rOptions )
139 sal_Int16 nAlgorithmType2;
140 switch (rOptions.algorithmType)
142 case SearchAlgorithms_REGEXP:
143 nAlgorithmType2 = SearchAlgorithms2::REGEXP;
144 break;
145 case SearchAlgorithms_APPROXIMATE:
146 nAlgorithmType2 = SearchAlgorithms2::APPROXIMATE;
147 break;
148 case SearchAlgorithms_ABSOLUTE:
149 nAlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
150 break;
151 default:
152 for (;;) std::abort();
154 // It would be nice if an inherited struct had a ctor that takes an
155 // instance of the object the struct derived from...
156 i18nutil::SearchOptions2 aOptions2(
157 rOptions.algorithmType,
158 rOptions.searchFlag,
159 rOptions.searchString,
160 rOptions.replaceString,
161 rOptions.Locale,
162 rOptions.changedChars,
163 rOptions.deletedChars,
164 rOptions.insertedChars,
165 rOptions.transliterateFlags,
166 nAlgorithmType2,
167 0 // no wildcard search, no escape character...
169 return aOptions2;
172 void TextSearch::Init( const SearchParam & rParam,
173 const css::lang::Locale& rLocale )
175 // convert SearchParam to the UNO SearchOptions2
176 i18nutil::SearchOptions2 aSOpt;
178 switch( rParam.GetSrchType() )
180 case SearchParam::SearchType::Wildcard:
181 aSOpt.AlgorithmType2 = SearchAlgorithms2::WILDCARD;
182 aSOpt.algorithmType = SearchAlgorithms::SearchAlgorithms_MAKE_FIXED_SIZE; // no old enum for that
183 aSOpt.WildcardEscapeCharacter = rParam.GetWildEscChar();
184 if (rParam.IsWildMatchSel())
185 aSOpt.searchFlag |= SearchFlags::WILD_MATCH_SELECTION;
186 break;
188 case SearchParam::SearchType::Regexp:
189 aSOpt.AlgorithmType2 = SearchAlgorithms2::REGEXP;
190 aSOpt.algorithmType = SearchAlgorithms_REGEXP;
191 break;
193 case SearchParam::SearchType::Normal:
194 aSOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
195 aSOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
196 break;
198 default:
199 for (;;) std::abort();
201 aSOpt.searchString = rParam.GetSrchStr();
202 aSOpt.replaceString = "";
203 aSOpt.Locale = rLocale;
204 aSOpt.transliterateFlags = TransliterationFlags::NONE;
205 if( !rParam.IsCaseSensitive() )
207 aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
208 aSOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE;
211 xTextSearch = getXTextSearch( aSOpt );
214 void TextSearch::SetLocale( const i18nutil::SearchOptions2& rOptions,
215 const css::lang::Locale& rLocale )
217 i18nutil::SearchOptions2 aSOpt( rOptions );
218 aSOpt.Locale = rLocale;
220 xTextSearch = getXTextSearch( aSOpt );
223 TextSearch::~TextSearch()
228 * General search methods. These methods will call the respective
229 * methods, such as ordinary string searching or regular expression
230 * matching, using the method pointer.
232 bool TextSearch::SearchForward( const OUString &rStr,
233 sal_Int32* pStart, sal_Int32* pEnd,
234 css::util::SearchResult* pRes)
236 bool bRet = false;
239 if( xTextSearch.is() )
241 SearchResult aRet( xTextSearch->searchForward( rStr, *pStart, *pEnd ));
242 if( aRet.subRegExpressions > 0 )
244 bRet = true;
245 // the XTextsearch returns in startOffset the higher position
246 // and the endposition is always exclusive.
247 // The caller of this function will have in startPos the
248 // lower pos. and end
249 *pStart = aRet.startOffset[ 0 ];
250 *pEnd = aRet.endOffset[ 0 ];
251 if( pRes )
252 *pRes = aRet;
256 catch ( Exception& )
258 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
260 return bRet;
263 bool TextSearch::searchForward( const OUString &rStr )
265 sal_Int32 pStart = 0;
266 sal_Int32 pEnd = rStr.getLength();
268 bool bResult = SearchForward(rStr, &pStart, &pEnd);
270 return bResult;
273 bool TextSearch::SearchBackward( const OUString & rStr, sal_Int32* pStart,
274 sal_Int32* pEnd, SearchResult* pRes )
276 bool bRet = false;
279 if( xTextSearch.is() )
281 SearchResult aRet( xTextSearch->searchBackward( rStr, *pStart, *pEnd ));
282 if( aRet.subRegExpressions )
284 bRet = true;
285 // the XTextsearch returns in startOffset the higher position
286 // and the endposition is always exclusive.
287 // The caller of this function will have in startPos the
288 // lower pos. and end
289 *pEnd = aRet.startOffset[ 0 ];
290 *pStart = aRet.endOffset[ 0 ];
291 if( pRes )
292 *pRes = aRet;
296 catch ( Exception& )
298 TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
300 return bRet;
303 void TextSearch::ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const SearchResult& rResult ) const
305 if( rResult.subRegExpressions <= 0 )
306 return;
308 sal_Unicode sFndChar;
309 sal_Int32 i;
310 OUStringBuffer sBuff(rReplaceStr.getLength()*4);
311 for(i = 0; i < rReplaceStr.getLength(); i++)
313 if( rReplaceStr[i] == '&')
315 sal_Int32 nStart = rResult.startOffset[0];
316 sal_Int32 nLength = rResult.endOffset[0] - rResult.startOffset[0];
317 sBuff.append(rStr.substr(nStart, nLength));
319 else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '$')
321 sFndChar = rReplaceStr[ i + 1 ];
322 switch(sFndChar)
323 { // placeholder for a backward reference?
324 case '0':
325 case '1':
326 case '2':
327 case '3':
328 case '4':
329 case '5':
330 case '6':
331 case '7':
332 case '8':
333 case '9':
335 int j = sFndChar - '0'; // index
336 if(j < rResult.subRegExpressions)
338 sal_Int32 nSttReg = rResult.startOffset[j];
339 sal_Int32 nRegLen = rResult.endOffset[j];
340 if (nSttReg < 0 || nRegLen < 0) // A "not found" optional capture
342 nSttReg = nRegLen = 0; // Copy empty string
344 else if (nRegLen >= nSttReg)
346 nRegLen = nRegLen - nSttReg;
348 else
350 nRegLen = nSttReg - nRegLen;
351 nSttReg = rResult.endOffset[j];
353 // Copy reference from found string
354 sBuff.append(rStr.substr(nSttReg, nRegLen));
356 i += 1;
358 break;
359 default:
360 sBuff.append(OUStringChar(rReplaceStr[i]) + OUStringChar(rReplaceStr[i+1]));
361 i += 1;
362 break;
365 else if((i < rReplaceStr.getLength() - 1) && rReplaceStr[i] == '\\')
367 sFndChar = rReplaceStr[ i+1 ];
368 switch(sFndChar)
370 case '\\':
371 case '&':
372 case '$':
373 sBuff.append(sFndChar);
374 i+=1;
375 break;
376 case 't':
377 sBuff.append('\t');
378 i += 1;
379 break;
380 default:
381 sBuff.append(OUStringChar(rReplaceStr[i]) + OUStringChar(rReplaceStr[i+1]));
382 i += 1;
383 break;
386 else
388 sBuff.append(rReplaceStr[i]);
391 rReplaceStr = sBuff.makeStringAndClear();
394 } // namespace utl
396 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */