merged tag ooo/OOO330_m14
[LibreOffice.git] / i18npool / source / search / textsearch.cxx
blobdea4eca2a618b8fcc3b611a0cd2eb264f868438e
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2000, 2010 Oracle and/or its affiliates.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * This file is part of OpenOffice.org.
11 * OpenOffice.org is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License version 3
13 * only, as published by the Free Software Foundation.
15 * OpenOffice.org is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License version 3 for more details
19 * (a copy is included in the LICENSE file that accompanied this code).
21 * You should have received a copy of the GNU Lesser General Public License
22 * version 3 along with OpenOffice.org. If not, see
23 * <http://www.openoffice.org/license.html>
24 * for a copy of the LGPLv3 License.
26 ************************************************************************/
28 // MARKER(update_precomp.py): autogen include statement, do not remove
29 #include "precompiled_i18npool.hxx"
31 #include "textsearch.hxx"
32 #include "levdis.hxx"
33 #include <regexp/reclass.hxx>
34 #include <com/sun/star/lang/Locale.hpp>
35 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
36 #include <comphelper/processfactory.hxx>
37 #include <com/sun/star/i18n/UnicodeType.hpp>
38 #include <com/sun/star/util/SearchFlags.hpp>
39 #include <com/sun/star/i18n/WordType.hpp>
40 #include <com/sun/star/i18n/ScriptType.hpp>
41 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
42 #include <com/sun/star/i18n/KCharacterType.hpp>
43 #include <com/sun/star/registry/XRegistryKey.hpp>
44 #include <cppuhelper/factory.hxx>
45 #include <cppuhelper/weak.hxx>
47 #ifdef _MSC_VER
48 // get rid of that dumb compiler warning
49 // identifier was truncated to '255' characters in the debug information
50 // for STL template usage, if .pdb files are to be created
51 #pragma warning( disable: 4786 )
52 #endif
54 #include <string.h>
56 using namespace ::com::sun::star::util;
57 using namespace ::com::sun::star::uno;
58 using namespace ::com::sun::star::lang;
59 using namespace ::com::sun::star::i18n;
60 using namespace ::rtl;
62 static sal_Int32 COMPLEX_TRANS_MASK_TMP =
63 TransliterationModules_ignoreBaFa_ja_JP |
64 TransliterationModules_ignoreIterationMark_ja_JP |
65 TransliterationModules_ignoreTiJi_ja_JP |
66 TransliterationModules_ignoreHyuByu_ja_JP |
67 TransliterationModules_ignoreSeZe_ja_JP |
68 TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
69 TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
70 TransliterationModules_ignoreProlongedSoundMark_ja_JP;
71 static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP;
72 static const sal_Int32 COMPLEX_TRANS_MASK =
73 COMPLEX_TRANS_MASK_TMP |
74 TransliterationModules_IGNORE_KANA |
75 TransliterationModules_IGNORE_WIDTH;
76 // Above 2 transliteration is simple but need to take effect in
77 // complex transliteration
79 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
80 : xMSF( rxMSF )
81 , pJumpTable( 0 )
82 , pJumpTable2( 0 )
83 , pRegExp( 0 )
84 , pWLD( 0 )
86 SearchOptions aOpt;
87 aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
88 aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
89 //aOpt.Locale = ???;
90 setOptions( aOpt );
93 TextSearch::~TextSearch()
95 delete pRegExp;
96 delete pWLD;
97 delete pJumpTable;
98 delete pJumpTable2;
101 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
103 aSrchPara = rOptions;
105 delete pRegExp, pRegExp = 0;
106 delete pWLD, pWLD = 0;
107 delete pJumpTable, pJumpTable = 0;
108 delete pJumpTable2, pJumpTable2 = 0;
110 // Create Transliteration class
111 if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
113 if( !xTranslit.is() )
115 Reference < XInterface > xI = xMSF->createInstance(
116 OUString::createFromAscii(
117 "com.sun.star.i18n.Transliteration"));
118 if ( xI.is() )
119 xI->queryInterface( ::getCppuType(
120 (const Reference< XExtendedTransliteration >*)0))
121 >>= xTranslit;
123 // Load transliteration module
124 if( xTranslit.is() )
125 xTranslit->loadModule(
126 (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
127 aSrchPara.Locale);
129 else if( xTranslit.is() )
130 xTranslit = 0;
132 // Create Transliteration for 2<->1, 2<->2 transliteration
133 if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
135 if( !xTranslit2.is() )
137 Reference < XInterface > xI = xMSF->createInstance(
138 OUString::createFromAscii(
139 "com.sun.star.i18n.Transliteration"));
140 if ( xI.is() )
141 xI->queryInterface( ::getCppuType(
142 (const Reference< XExtendedTransliteration >*)0))
143 >>= xTranslit2;
145 // Load transliteration module
146 if( xTranslit2.is() )
147 xTranslit2->loadModule(
148 (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
149 aSrchPara.Locale);
152 if ( !xBreak.is() )
154 Reference < XInterface > xI = xMSF->createInstance(
155 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
156 if( xI.is() )
157 xI->queryInterface( ::getCppuType(
158 (const Reference< XBreakIterator >*)0))
159 >>= xBreak;
162 sSrchStr = aSrchPara.searchString;
164 // use transliteration here, but only if not RegEx, which does it different
165 if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() &&
166 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
167 sSrchStr = xTranslit->transliterateString2String(
168 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
170 if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() &&
171 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
172 sSrchStr2 = xTranslit2->transliterateString2String(
173 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
175 // When start or end of search string is a complex script type, we need to
176 // make sure the result boundary is not located in the middle of cell.
177 checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
178 ScriptType::COMPLEX));
179 checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
180 sSrchStr.getLength()-1) == ScriptType::COMPLEX));
182 if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP )
184 fnForward = &TextSearch::RESrchFrwrd;
185 fnBackward = &TextSearch::RESrchBkwrd;
187 pRegExp = new Regexpr( aSrchPara, xTranslit );
189 else
191 if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
193 fnForward = &TextSearch::ApproxSrchFrwrd;
194 fnBackward = &TextSearch::ApproxSrchBkwrd;
196 pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
197 aSrchPara.insertedChars, aSrchPara.deletedChars,
198 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
200 nLimit = pWLD->GetLimit();
202 else
204 fnForward = &TextSearch::NSrchFrwrd;
205 fnBackward = &TextSearch::NSrchBkwrd;
210 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
212 sal_Int32 nRet = 0, nEnd = rOff.getLength();
213 while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
214 return nRet;
217 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
218 throw( RuntimeException )
220 sal_Int32 nDone;
221 return nPos == xBreak->previousCharacters(searchStr, nPos+1,
222 aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
225 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
226 throw( RuntimeException )
228 SearchResult sres;
230 OUString in_str(searchStr);
231 sal_Int32 newStartPos = startPos;
232 sal_Int32 newEndPos = endPos;
234 bUsePrimarySrchStr = true;
236 if ( xTranslit.is() )
238 // apply normal transliteration (1<->1, 1<->0)
239 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
240 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
242 // JP 20.6.2001: also the start and end positions must be corrected!
243 if( startPos )
244 newStartPos = FindPosInSeq_Impl( offset, startPos );
246 if( endPos < searchStr.getLength() )
247 newEndPos = FindPosInSeq_Impl( offset, endPos );
248 else
249 newEndPos = in_str.getLength();
251 sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
253 for ( int k = 0; k < sres.startOffset.getLength(); k++ )
255 if (sres.startOffset[k])
256 sres.startOffset[k] = offset[sres.startOffset[k]];
257 // JP 20.6.2001: end is ever exclusive and then don't return
258 // the position of the next character - return the
259 // next position behind the last found character!
260 // "a b c" find "b" must return 2,3 and not 2,4!!!
261 if (sres.endOffset[k])
262 sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
265 else
267 sres = (this->*fnForward)( in_str, startPos, endPos );
270 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
272 SearchResult sres2;
274 in_str = OUString(searchStr);
275 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
277 in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
279 if( startPos )
280 startPos = FindPosInSeq_Impl( offset, startPos );
282 if( endPos < searchStr.getLength() )
283 endPos = FindPosInSeq_Impl( offset, endPos );
284 else
285 endPos = in_str.getLength();
287 bUsePrimarySrchStr = false;
288 sres2 = (this->*fnForward)( in_str, startPos, endPos );
290 for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
292 if (sres2.startOffset[k])
293 sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
294 if (sres2.endOffset[k])
295 sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
298 // pick first and long one
299 if ( sres.subRegExpressions == 0)
300 return sres2;
301 if ( sres2.subRegExpressions == 1)
303 if ( sres.startOffset[0] > sres2.startOffset[0])
304 return sres2;
305 else if ( sres.startOffset[0] == sres2.startOffset[0] &&
306 sres.endOffset[0] < sres2.endOffset[0])
307 return sres2;
311 return sres;
314 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
315 throw(RuntimeException)
317 SearchResult sres;
319 OUString in_str(searchStr);
320 sal_Int32 newStartPos = startPos;
321 sal_Int32 newEndPos = endPos;
323 bUsePrimarySrchStr = true;
325 if ( xTranslit.is() )
327 // apply only simple 1<->1 transliteration here
328 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
329 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
331 // JP 20.6.2001: also the start and end positions must be corrected!
332 if( startPos < searchStr.getLength() )
333 newStartPos = FindPosInSeq_Impl( offset, startPos );
334 else
335 newStartPos = in_str.getLength();
337 if( endPos )
338 newEndPos = FindPosInSeq_Impl( offset, endPos );
340 sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
342 for ( int k = 0; k < sres.startOffset.getLength(); k++ )
344 if (sres.startOffset[k])
345 sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
346 // JP 20.6.2001: end is ever exclusive and then don't return
347 // the position of the next character - return the
348 // next position behind the last found character!
349 // "a b c" find "b" must return 2,3 and not 2,4!!!
350 if (sres.endOffset[k])
351 sres.endOffset[k] = offset[sres.endOffset[k]];
354 else
356 sres = (this->*fnBackward)( in_str, startPos, endPos );
359 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
361 SearchResult sres2;
363 in_str = OUString(searchStr);
364 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
366 in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
368 if( startPos < searchStr.getLength() )
369 startPos = FindPosInSeq_Impl( offset, startPos );
370 else
371 startPos = in_str.getLength();
373 if( endPos )
374 endPos = FindPosInSeq_Impl( offset, endPos );
376 bUsePrimarySrchStr = false;
377 sres2 = (this->*fnBackward)( in_str, startPos, endPos );
379 for( int k = 0; k < sres2.startOffset.getLength(); k++ )
381 if (sres2.startOffset[k])
382 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
383 if (sres2.endOffset[k])
384 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
387 // pick last and long one
388 if ( sres.subRegExpressions == 0 )
389 return sres2;
390 if ( sres2.subRegExpressions == 1 )
392 if ( sres.startOffset[0] < sres2.startOffset[0] )
393 return sres2;
394 if ( sres.startOffset[0] == sres2.startOffset[0] &&
395 sres.endOffset[0] > sres2.endOffset[0] )
396 return sres2;
400 return sres;
405 //--------------- die Wort-Trennner ----------------------------------
407 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
409 bool bRet = 1;
410 if( '\x7f' != rStr[nPos])
412 if ( !xCharClass.is() )
414 Reference < XInterface > xI = xMSF->createInstance(
415 OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
416 if( xI.is() )
417 xI->queryInterface( ::getCppuType(
418 (const Reference< XCharacterClassification >*)0))
419 >>= xCharClass;
421 if ( xCharClass.is() )
423 sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
424 aSrchPara.Locale );
425 if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
426 KCharacterType::LETTER ) & nCType ) )
427 bRet = 0;
430 return bRet;
435 // --------- methods for the kind of boyer-morre search ------------------
438 void TextSearch::MakeForwardTab()
440 // create the jumptable for the search text
441 if( pJumpTable )
443 if( bIsForwardTab )
444 return ; // the jumpTable is ok
445 delete pJumpTable;
447 bIsForwardTab = true;
449 sal_Int32 n, nLen = sSrchStr.getLength();
450 pJumpTable = new TextSearchJumpTable;
452 for( n = 0; n < nLen - 1; ++n )
454 sal_Unicode cCh = sSrchStr[n];
455 sal_Int32 nDiff = nLen - n - 1;
456 TextSearchJumpTable::value_type aEntry( cCh, nDiff );
458 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
459 pJumpTable->insert( aEntry );
460 if ( !aPair.second )
461 (*(aPair.first)).second = nDiff;
465 void TextSearch::MakeForwardTab2()
467 // create the jumptable for the search text
468 if( pJumpTable2 )
470 if( bIsForwardTab )
471 return ; // the jumpTable is ok
472 delete pJumpTable2;
474 bIsForwardTab = true;
476 sal_Int32 n, nLen = sSrchStr2.getLength();
477 pJumpTable2 = new TextSearchJumpTable;
479 for( n = 0; n < nLen - 1; ++n )
481 sal_Unicode cCh = sSrchStr2[n];
482 sal_Int32 nDiff = nLen - n - 1;
484 TextSearchJumpTable::value_type aEntry( cCh, nDiff );
485 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
486 pJumpTable2->insert( aEntry );
487 if ( !aPair.second )
488 (*(aPair.first)).second = nDiff;
492 void TextSearch::MakeBackwardTab()
494 // create the jumptable for the search text
495 if( pJumpTable )
497 if( !bIsForwardTab )
498 return ; // the jumpTable is ok
499 delete pJumpTable;
501 bIsForwardTab = false;
503 sal_Int32 n, nLen = sSrchStr.getLength();
504 pJumpTable = new TextSearchJumpTable;
506 for( n = nLen-1; n > 0; --n )
508 sal_Unicode cCh = sSrchStr[n];
509 TextSearchJumpTable::value_type aEntry( cCh, n );
510 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
511 pJumpTable->insert( aEntry );
512 if ( !aPair.second )
513 (*(aPair.first)).second = n;
517 void TextSearch::MakeBackwardTab2()
519 // create the jumptable for the search text
520 if( pJumpTable2 )
522 if( !bIsForwardTab )
523 return ; // the jumpTable is ok
524 delete pJumpTable2;
526 bIsForwardTab = false;
528 sal_Int32 n, nLen = sSrchStr2.getLength();
529 pJumpTable2 = new TextSearchJumpTable;
531 for( n = nLen-1; n > 0; --n )
533 sal_Unicode cCh = sSrchStr2[n];
534 TextSearchJumpTable::value_type aEntry( cCh, n );
535 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
536 pJumpTable2->insert( aEntry );
537 if ( !aPair.second )
538 (*(aPair.first)).second = n;
542 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
544 TextSearchJumpTable *pJump;
545 OUString sSearchKey;
547 if ( bUsePrimarySrchStr ) {
548 pJump = pJumpTable;
549 sSearchKey = sSrchStr;
550 } else {
551 pJump = pJumpTable2;
552 sSearchKey = sSrchStr2;
555 TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
556 if ( iLook == pJump->end() )
557 return sSearchKey.getLength();
558 return (*iLook).second;
562 // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
563 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
564 throw(RuntimeException)
566 SearchResult aRet;
567 aRet.subRegExpressions = 0;
569 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
571 OUString aStr( searchStr );
572 sal_Int32 nSuchIdx = aStr.getLength();
573 sal_Int32 nEnde = endPos;
574 if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
575 return aRet;
578 if( nEnde < sSearchKey.getLength() ) // position inside the search region ?
579 return aRet;
581 nEnde -= sSearchKey.getLength();
583 if (bUsePrimarySrchStr)
584 MakeForwardTab(); // create the jumptable
585 else
586 MakeForwardTab2();
588 for (sal_Int32 nCmpIdx = startPos; // start position for the search
589 nCmpIdx <= nEnde;
590 nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
592 // if the match would be the completed cells, skip it.
593 if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
594 && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
595 continue;
597 nSuchIdx = sSearchKey.getLength() - 1;
598 while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
600 if( nSuchIdx == 0 )
602 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
604 sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
605 bool bAtStart = !nCmpIdx;
606 bool bAtEnd = nFndEnd == endPos;
607 bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
608 bool bDelimBehind = IsDelimiter( aStr, nFndEnd );
609 // * 1 -> only one word in the paragraph
610 // * 2 -> at begin of paragraph
611 // * 3 -> at end of paragraph
612 // * 4 -> inside the paragraph
613 if( !( ( bAtStart && bAtEnd ) || // 1
614 ( bAtStart && bDelimBehind ) || // 2
615 ( bAtEnd && bDelimBefore ) || // 3
616 ( bDelimBefore && bDelimBehind ))) // 4
617 break;
620 aRet.subRegExpressions = 1;
621 aRet.startOffset.realloc( 1 );
622 aRet.startOffset[ 0 ] = nCmpIdx;
623 aRet.endOffset.realloc( 1 );
624 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
626 return aRet;
628 else
629 nSuchIdx--;
632 return aRet;
635 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
636 throw(RuntimeException)
638 SearchResult aRet;
639 aRet.subRegExpressions = 0;
641 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
643 OUString aStr( searchStr );
644 sal_Int32 nSuchIdx = aStr.getLength();
645 sal_Int32 nEnde = endPos;
646 if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
647 return aRet;
649 if (bUsePrimarySrchStr)
650 MakeBackwardTab(); // create the jumptable
651 else
652 MakeBackwardTab2();
654 if( nEnde == nSuchIdx ) // end position for the search
655 nEnde = sSearchKey.getLength();
656 else
657 nEnde += sSearchKey.getLength();
659 sal_Int32 nCmpIdx = startPos; // start position for the search
661 while (nCmpIdx >= nEnde)
663 // if the match would be the completed cells, skip it.
664 if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
665 sSearchKey.getLength() )) && (!checkCTLEnd ||
666 isCellStart( aStr, nCmpIdx)))
668 nSuchIdx = 0;
669 while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
670 aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
671 nSuchIdx++;
672 if( nSuchIdx >= sSearchKey.getLength() )
674 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
676 sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
677 bool bAtStart = !nFndStt;
678 bool bAtEnd = nCmpIdx == startPos;
679 bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
680 bool bDelimBefore = bAtStart || // begin of paragraph
681 IsDelimiter( aStr, nFndStt-1 );
682 // * 1 -> only one word in the paragraph
683 // * 2 -> at begin of paragraph
684 // * 3 -> at end of paragraph
685 // * 4 -> inside the paragraph
686 if( ( bAtStart && bAtEnd ) || // 1
687 ( bAtStart && bDelimBehind ) || // 2
688 ( bAtEnd && bDelimBefore ) || // 3
689 ( bDelimBefore && bDelimBehind )) // 4
691 aRet.subRegExpressions = 1;
692 aRet.startOffset.realloc( 1 );
693 aRet.startOffset[ 0 ] = nCmpIdx;
694 aRet.endOffset.realloc( 1 );
695 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
696 return aRet;
699 else
701 aRet.subRegExpressions = 1;
702 aRet.startOffset.realloc( 1 );
703 aRet.startOffset[ 0 ] = nCmpIdx;
704 aRet.endOffset.realloc( 1 );
705 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
706 return aRet;
710 nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
711 if( nCmpIdx < nSuchIdx )
712 return aRet;
713 nCmpIdx -= nSuchIdx;
715 return aRet;
720 //---------------------------------------------------------------------------
721 // ------- Methoden fuer die Suche ueber Regular-Expressions --------------
723 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
724 sal_Int32 startPos, sal_Int32 endPos )
725 throw(RuntimeException)
727 SearchResult aRet;
728 aRet.subRegExpressions = 0;
729 OUString aStr( searchStr );
731 bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
732 SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
734 pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
736 struct re_registers regs;
738 // Clear structure
739 memset((void *)&regs, 0, sizeof(struct re_registers));
740 if ( ! pRegExp->re_search(&regs, startPos) )
742 if( regs.num_of_match > 0 &&
743 (regs.start[0] != -1 && regs.end[0] != -1) )
745 aRet.startOffset.realloc(regs.num_of_match);
746 aRet.endOffset.realloc(regs.num_of_match);
748 sal_Int32 i = 0, j = 0;
749 while( j < regs.num_of_match )
751 if( regs.start[j] != -1 && regs.end[j] != -1 )
753 aRet.startOffset[i] = regs.start[j];
754 aRet.endOffset[i] = regs.end[j];
755 ++i;
757 ++j;
759 aRet.subRegExpressions = i;
761 if ( regs.num_regs > 0 )
763 if ( regs.start )
764 free(regs.start);
765 if ( regs.end )
766 free(regs.end);
770 return aRet;
774 * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
776 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
777 sal_Int32 startPos, sal_Int32 endPos )
778 throw(RuntimeException)
780 SearchResult aRet;
781 aRet.subRegExpressions = 0;
782 OUString aStr( searchStr );
784 sal_Int32 nOffset = 0;
785 sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
787 bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
788 SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
790 if( startPos )
791 nOffset = startPos - 1;
793 // search only in the subString
794 if( bSearchInSel && nStrEnde )
796 aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde );
797 if( nOffset > nStrEnde )
798 nOffset = nOffset - nStrEnde;
799 else
800 nOffset = 0;
803 // set the length to negative for reverse search
804 pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) );
805 struct re_registers regs;
807 // Clear structure
808 memset((void *)&regs, 0, sizeof(struct re_registers));
809 if ( ! pRegExp->re_search(&regs, nOffset) )
811 if( regs.num_of_match > 0 &&
812 (regs.start[0] != -1 && regs.end[0] != -1) )
814 nOffset = bSearchInSel ? nStrEnde : 0;
815 aRet.startOffset.realloc(regs.num_of_match);
816 aRet.endOffset.realloc(regs.num_of_match);
818 sal_Int32 i = 0, j = 0;
819 while( j < regs.num_of_match )
821 if( regs.start[j] != -1 && regs.end[j] != -1 )
823 aRet.startOffset[i] = regs.end[j] + nOffset;
824 aRet.endOffset[i] = regs.start[j] + nOffset;
825 ++i;
827 ++j;
829 aRet.subRegExpressions = i;
831 if ( regs.num_regs > 0 )
833 if ( regs.start )
834 free(regs.start);
835 if ( regs.end )
836 free(regs.end);
840 return aRet;
843 // Phonetische Suche von Worten
844 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
845 sal_Int32 startPos, sal_Int32 endPos )
846 throw(RuntimeException)
848 SearchResult aRet;
849 aRet.subRegExpressions = 0;
851 if( !xBreak.is() )
852 return aRet;
854 OUString aWTemp( searchStr );
856 register sal_Int32 nStt, nEnd;
858 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
859 aSrchPara.Locale,
860 WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
864 if( aWBnd.startPos >= endPos )
865 break;
866 nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
867 nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
869 if( nStt < nEnd &&
870 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
872 aRet.subRegExpressions = 1;
873 aRet.startOffset.realloc( 1 );
874 aRet.startOffset[ 0 ] = nStt;
875 aRet.endOffset.realloc( 1 );
876 aRet.endOffset[ 0 ] = nEnd;
877 break;
880 nStt = nEnd - 1;
881 aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
882 WordType::ANYWORD_IGNOREWHITESPACES);
883 } while( aWBnd.startPos != aWBnd.endPos ||
884 (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
885 // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
886 // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
887 // and nextWord() does also => don't loop forever.
888 return aRet;
891 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
892 sal_Int32 startPos, sal_Int32 endPos )
893 throw(RuntimeException)
895 SearchResult aRet;
896 aRet.subRegExpressions = 0;
898 if( !xBreak.is() )
899 return aRet;
901 OUString aWTemp( searchStr );
903 register sal_Int32 nStt, nEnd;
905 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
906 aSrchPara.Locale,
907 WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
911 if( aWBnd.endPos <= endPos )
912 break;
913 nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
914 nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
916 if( nStt < nEnd &&
917 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
919 aRet.subRegExpressions = 1;
920 aRet.startOffset.realloc( 1 );
921 aRet.startOffset[ 0 ] = nEnd;
922 aRet.endOffset.realloc( 1 );
923 aRet.endOffset[ 0 ] = nStt;
924 break;
926 if( !nStt )
927 break;
929 aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
930 WordType::ANYWORD_IGNOREWHITESPACES);
931 } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
932 return aRet;
936 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
937 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
939 static OUString getServiceName_Static()
941 return OUString::createFromAscii( cSearchName );
944 static OUString getImplementationName_Static()
946 return OUString::createFromAscii( cSearchImpl );
949 OUString SAL_CALL
950 TextSearch::getImplementationName()
951 throw( RuntimeException )
953 return getImplementationName_Static();
956 sal_Bool SAL_CALL
957 TextSearch::supportsService(const OUString& rServiceName)
958 throw( RuntimeException )
960 return !rServiceName.compareToAscii( cSearchName );
963 Sequence< OUString > SAL_CALL
964 TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
966 Sequence< OUString > aRet(1);
967 aRet[0] = getServiceName_Static();
968 return aRet;
971 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
972 SAL_CALL TextSearch_CreateInstance(
973 const ::com::sun::star::uno::Reference<
974 ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
976 return ::com::sun::star::uno::Reference<
977 ::com::sun::star::uno::XInterface >(
978 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
981 extern "C"
984 void SAL_CALL component_getImplementationEnvironment(
985 const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
987 *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
990 sal_Bool SAL_CALL component_writeInfo(
991 void* /*_pServiceManager*/, void* _pRegistryKey )
993 if (_pRegistryKey)
995 ::com::sun::star::registry::XRegistryKey * pRegistryKey =
996 reinterpret_cast< ::com::sun::star::registry::XRegistryKey* >(
997 _pRegistryKey );
998 ::com::sun::star::uno::Reference<
999 ::com::sun::star::registry::XRegistryKey > xNewKey;
1001 xNewKey = pRegistryKey->createKey( getImplementationName_Static() );
1002 xNewKey = xNewKey->createKey(
1003 ::rtl::OUString::createFromAscii( "/UNO/SERVICES" ) );
1004 xNewKey->createKey( getServiceName_Static() );
1006 return sal_True;
1009 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
1010 void* _pServiceManager, void* /*_pRegistryKey*/ )
1012 void* pRet = NULL;
1014 ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
1015 reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
1016 ( _pServiceManager );
1017 ::com::sun::star::uno::Reference<
1018 ::com::sun::star::lang::XSingleServiceFactory > xFactory;
1020 if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
1022 ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
1023 aServiceNames[0] = getServiceName_Static();
1024 xFactory = ::cppu::createSingleFactory(
1025 pServiceManager, getImplementationName_Static(),
1026 &TextSearch_CreateInstance, aServiceNames );
1029 if ( xFactory.is() )
1031 xFactory->acquire();
1032 pRet = xFactory.get();
1035 return pRet;
1038 } // extern "C"