Updated core
[LibreOffice.git] / unotools / source / i18n / textsearch.cxx
blobdfb871e704393eeebee35c1efaa11100046dd7ae
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <i18nlangtag/languagetag.hxx>
21 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
22 #include <com/sun/star/util/TextSearch.hpp>
23 #include <com/sun/star/util/SearchFlags.hpp>
24 #include <com/sun/star/i18n/TransliterationModules.hpp>
25 #include <unotools/charclass.hxx>
26 #include <comphelper/processfactory.hxx>
27 #include <unotools/textsearch.hxx>
28 #include <rtl/instance.hxx>
30 using namespace ::com::sun::star::util;
31 using namespace ::com::sun::star::uno;
32 using namespace ::com::sun::star::lang;
34 // ............................................................................
35 namespace utl
37 // ............................................................................
39 SearchParam::SearchParam( const OUString &rText,
40 SearchType eType,
41 sal_Bool bCaseSensitive,
42 sal_Bool bWrdOnly,
43 sal_Bool bSearchInSel )
45 sSrchStr = rText;
46 m_eSrchType = eType;
48 m_bWordOnly = bWrdOnly;
49 m_bSrchInSel = bSearchInSel;
50 m_bCaseSense = bCaseSensitive;
52 nTransliterationFlags = 0;
54 // Parameters for weighted Levenshtein distance
55 bLEV_Relaxed = sal_True;
56 nLEV_OtherX = 2;
57 nLEV_ShorterY = 1;
58 nLEV_LongerZ = 3;
61 SearchParam::SearchParam( const SearchParam& rParam )
63 sSrchStr = rParam.sSrchStr;
64 sReplaceStr = rParam.sReplaceStr;
65 m_eSrchType = rParam.m_eSrchType;
67 m_bWordOnly = rParam.m_bWordOnly;
68 m_bSrchInSel = rParam.m_bSrchInSel;
69 m_bCaseSense = rParam.m_bCaseSense;
71 bLEV_Relaxed = rParam.bLEV_Relaxed;
72 nLEV_OtherX = rParam.nLEV_OtherX;
73 nLEV_ShorterY = rParam.nLEV_ShorterY;
74 nLEV_LongerZ = rParam.nLEV_LongerZ;
76 nTransliterationFlags = rParam.nTransliterationFlags;
79 SearchParam::~SearchParam() {}
81 static bool lcl_Equals( const SearchOptions& rSO1, const SearchOptions& rSO2 )
83 return rSO1.algorithmType == rSO2.algorithmType &&
84 rSO1.searchFlag == rSO2.searchFlag &&
85 rSO1.searchString.equals(rSO2.searchString) &&
86 rSO1.replaceString.equals(rSO2.replaceString) &&
87 rSO1.changedChars == rSO2.changedChars &&
88 rSO1.deletedChars == rSO2.deletedChars &&
89 rSO1.insertedChars == rSO2.insertedChars &&
90 rSO1.Locale.Language == rSO2.Locale.Language &&
91 rSO1.Locale.Country == rSO2.Locale.Country &&
92 rSO1.Locale.Variant == rSO2.Locale.Variant &&
93 rSO1.transliterateFlags == rSO2.transliterateFlags;
96 namespace
98 struct CachedTextSearch
100 ::osl::Mutex mutex;
101 ::com::sun::star::util::SearchOptions Options;
102 ::com::sun::star::uno::Reference< ::com::sun::star::util::XTextSearch > xTextSearch;
105 struct theCachedTextSearch
106 : public rtl::Static< CachedTextSearch, theCachedTextSearch > {};
109 Reference<XTextSearch> TextSearch::getXTextSearch( const SearchOptions& rPara )
111 CachedTextSearch &rCache = theCachedTextSearch::get();
113 osl::MutexGuard aGuard(rCache.mutex);
115 if ( lcl_Equals(rCache.Options, rPara) )
116 return rCache.xTextSearch;
118 Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
119 rCache.xTextSearch.set( ::TextSearch::create(xContext) );
120 rCache.xTextSearch->setOptions( rPara );
121 rCache.Options = rPara;
123 return rCache.xTextSearch;
126 TextSearch::TextSearch(const SearchParam & rParam, LanguageType eLang )
128 if( LANGUAGE_NONE == eLang )
129 eLang = LANGUAGE_SYSTEM;
130 ::com::sun::star::lang::Locale aLocale( LanguageTag( eLang ).getLocale() );
132 Init( rParam, aLocale);
135 TextSearch::TextSearch(const SearchParam & rParam, const CharClass& rCClass )
137 Init( rParam, rCClass.getLanguageTag().getLocale() );
140 TextSearch::TextSearch( const SearchOptions& rPara )
142 xTextSearch = getXTextSearch( rPara );
145 void TextSearch::Init( const SearchParam & rParam,
146 const ::com::sun::star::lang::Locale& rLocale )
148 // convert SearchParam to the UNO SearchOptions
149 SearchOptions aSOpt;
151 switch( rParam.GetSrchType() )
153 case SearchParam::SRCH_REGEXP:
154 aSOpt.algorithmType = SearchAlgorithms_REGEXP;
155 if( rParam.IsSrchInSelection() )
156 aSOpt.searchFlag |= SearchFlags::REG_NOT_BEGINOFLINE |
157 SearchFlags::REG_NOT_ENDOFLINE;
158 break;
160 case SearchParam::SRCH_LEVDIST:
161 aSOpt.algorithmType = SearchAlgorithms_APPROXIMATE;
162 aSOpt.changedChars = rParam.GetLEVOther();
163 aSOpt.deletedChars = rParam.GetLEVLonger();
164 aSOpt.insertedChars = rParam.GetLEVShorter();
165 if( rParam.IsSrchRelaxed() )
166 aSOpt.searchFlag |= SearchFlags::LEV_RELAXED;
167 break;
169 // case SearchParam::SRCH_NORMAL:
170 default:
171 aSOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
172 if( rParam.IsSrchWordOnly() )
173 aSOpt.searchFlag |= SearchFlags::NORM_WORD_ONLY;
174 break;
176 aSOpt.searchString = rParam.GetSrchStr();
177 aSOpt.replaceString = rParam.GetReplaceStr();
178 aSOpt.Locale = rLocale;
179 aSOpt.transliterateFlags = rParam.GetTransliterationFlags();
180 if( !rParam.IsCaseSensitive() )
182 aSOpt.searchFlag |= SearchFlags::ALL_IGNORE_CASE;
183 aSOpt.transliterateFlags |= ::com::sun::star::i18n::TransliterationModules_IGNORE_CASE;
186 xTextSearch = getXTextSearch( aSOpt );
189 void TextSearch::SetLocale( const ::com::sun::star::util::SearchOptions& rOptions,
190 const ::com::sun::star::lang::Locale& rLocale )
192 // convert SearchParam to the UNO SearchOptions
193 SearchOptions aSOpt( rOptions );
194 aSOpt.Locale = rLocale;
196 xTextSearch = getXTextSearch( aSOpt );
200 TextSearch::~TextSearch()
205 * General search methods. These methods will call the respective
206 * methods, such as ordinary string searching or regular expression
207 * matching, using the method pointer.
209 int TextSearch::SearchFrwrd( const String & rStr, xub_StrLen* pStart,
210 xub_StrLen* pEnde, SearchResult* pRes )
212 int nRet = 0;
215 if( xTextSearch.is() )
217 SearchResult aRet( xTextSearch->searchForward(
218 rStr, *pStart, *pEnde ));
219 if( aRet.subRegExpressions > 0 )
221 nRet = 1;
222 // the XTextsearch returns in startOffset the higher position
223 // and the endposition is always exclusive.
224 // The caller of this function will have in startPos the
225 // lower pos. and end
226 *pStart = (xub_StrLen)aRet.startOffset[ 0 ];
227 *pEnde = (xub_StrLen)aRet.endOffset[ 0 ];
228 if( pRes )
229 *pRes = aRet;
233 catch ( Exception& )
235 SAL_WARN( "unotools.i18n", "SearchForward: Exception caught!" );
237 return nRet;
240 sal_Bool TextSearch::SearchForward( const OUString &rStr,
241 sal_Int32* pStart, sal_Int32* pEnd,
242 ::com::sun::star::util::SearchResult* pRes)
244 sal_Bool nRet = sal_False;
247 if( xTextSearch.is() )
249 SearchResult aRet( xTextSearch->searchForward(
250 rStr, *pStart, *pEnd ));
251 if( aRet.subRegExpressions > 0 )
253 nRet = sal_True;
254 // the XTextsearch returns in startOffset the higher position
255 // and the endposition is always exclusive.
256 // The caller of this function will have in startPos the
257 // lower pos. and end
258 *pStart = aRet.startOffset[ 0 ];
259 *pEnd = aRet.endOffset[ 0 ];
260 if( pRes )
261 *pRes = aRet;
265 catch ( Exception& )
267 SAL_WARN( "unotools.i18n", "SearchForward: Exception caught!" );
269 return nRet;
273 int TextSearch::SearchBkwrd( const String & rStr, xub_StrLen* pStart,
274 xub_StrLen* pEnde, SearchResult* pRes )
276 int nRet = 0;
279 if( xTextSearch.is() )
281 SearchResult aRet( xTextSearch->searchBackward(
282 rStr, *pStart, *pEnde ));
283 if( aRet.subRegExpressions )
285 nRet = 1;
286 // the XTextsearch returns in startOffset the higher position
287 // and the endposition is always exclusive.
288 // The caller of this function will have in startPos the
289 // lower pos. and end
290 *pEnde = (xub_StrLen)aRet.startOffset[ 0 ];
291 *pStart = (xub_StrLen)aRet.endOffset[ 0 ];
292 if( pRes )
293 *pRes = aRet;
297 catch ( Exception& )
299 SAL_WARN( "unotools.i18n", "SearchBackward: Exception caught!" );
301 return nRet;
304 void TextSearch::ReplaceBackReferences( String& rReplaceStr, const String &rStr, const SearchResult& rResult )
306 if( rResult.subRegExpressions > 0 )
308 OUString sTab( '\t' );
309 sal_Unicode sSrchChrs[] = {'\\', '&', '$', 0};
310 String sTmp;
311 xub_StrLen nPos = 0;
312 sal_Unicode sFndChar;
313 while( STRING_NOTFOUND != ( nPos = rReplaceStr.SearchChar( sSrchChrs, nPos )) )
315 if( rReplaceStr.GetChar( nPos ) == '&')
317 sal_uInt16 nStart = (sal_uInt16)(rResult.startOffset[0]);
318 sal_uInt16 nLength = (sal_uInt16)(rResult.endOffset[0] - rResult.startOffset[0]);
319 rReplaceStr.Erase( nPos, 1 ); // delete ampersand
320 // replace by found string
321 rReplaceStr.Insert( rStr, nStart, nLength, nPos );
322 // jump over
323 nPos = nPos + nLength;
325 else if( rReplaceStr.GetChar( nPos ) == '$')
327 if( nPos + 1 < rReplaceStr.Len())
329 sFndChar = rReplaceStr.GetChar( nPos + 1 );
330 switch(sFndChar)
331 { // placeholder for a backward reference?
332 case '0':
333 case '1':
334 case '2':
335 case '3':
336 case '4':
337 case '5':
338 case '6':
339 case '7':
340 case '8':
341 case '9':
343 rReplaceStr.Erase( nPos, 2 ); // delete both
344 int i = sFndChar - '0'; // index
345 if(i < rResult.subRegExpressions)
347 sal_uInt16 nSttReg = (sal_uInt16)(rResult.startOffset[i]);
348 sal_uInt16 nRegLen = (sal_uInt16)(rResult.endOffset[i]);
349 if( nRegLen > nSttReg )
350 nRegLen = nRegLen - nSttReg;
351 else
353 nRegLen = nSttReg - nRegLen;
354 nSttReg = (sal_uInt16)(rResult.endOffset[i]);
356 // Copy reference from found string
357 sTmp = rStr.Copy((sal_uInt16)nSttReg, (sal_uInt16)nRegLen);
358 // insert
359 rReplaceStr.Insert( sTmp, nPos );
360 // and step over
361 nPos = nPos + sTmp.Len();
364 break;
365 default:
366 nPos += 2; // leave both chars unchanged
367 break;
370 else
371 ++nPos;
373 else
375 // at least another character?
376 if( nPos + 1 < rReplaceStr.Len())
378 sFndChar = rReplaceStr.GetChar( nPos + 1 );
379 switch(sFndChar)
381 case '\\':
382 case '&':
383 case '$':
384 rReplaceStr.Erase( nPos, 1 );
385 nPos++;
386 break;
387 case 't':
388 rReplaceStr.Erase( nPos, 2 ); // delete both
389 rReplaceStr.Insert( sTab, nPos ); // insert tabulator
390 nPos++; // step over
391 break;
392 default:
393 nPos += 2; // ignore both characters
394 break;
397 else
398 ++nPos;
404 // ............................................................................
405 } // namespace utl
406 // ............................................................................
408 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */