merge the formfield patch from ooo-build
[ooovba.git] / i18npool / source / search / textsearch.cxx
blob575c3b7f53320cef9094ec6f344d4bea6d2d714c
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: textsearch.cxx,v $
10 * $Revision: 1.12 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include "textsearch.hxx"
35 #include "levdis.hxx"
36 #include <regexp/reclass.hxx>
37 #include <com/sun/star/lang/Locale.hpp>
38 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
39 #include <comphelper/processfactory.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <com/sun/star/util/SearchFlags.hpp>
42 #include <com/sun/star/i18n/WordType.hpp>
43 #include <com/sun/star/i18n/ScriptType.hpp>
44 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
45 #include <com/sun/star/i18n/KCharacterType.hpp>
46 #include <com/sun/star/registry/XRegistryKey.hpp>
47 #include <cppuhelper/factory.hxx>
48 #include <cppuhelper/weak.hxx>
50 #ifdef _MSC_VER
51 // get rid of that dumb compiler warning
52 // identifier was truncated to '255' characters in the debug information
53 // for STL template usage, if .pdb files are to be created
54 #pragma warning( disable: 4786 )
55 #endif
57 #include <string.h>
59 using namespace ::com::sun::star::util;
60 using namespace ::com::sun::star::uno;
61 using namespace ::com::sun::star::lang;
62 using namespace ::com::sun::star::i18n;
63 using namespace ::rtl;
65 static sal_Int32 COMPLEX_TRANS_MASK_TMP =
66 TransliterationModules_ignoreBaFa_ja_JP |
67 TransliterationModules_ignoreIterationMark_ja_JP |
68 TransliterationModules_ignoreTiJi_ja_JP |
69 TransliterationModules_ignoreHyuByu_ja_JP |
70 TransliterationModules_ignoreSeZe_ja_JP |
71 TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
72 TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
73 TransliterationModules_ignoreProlongedSoundMark_ja_JP;
74 static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP;
75 static const sal_Int32 COMPLEX_TRANS_MASK =
76 COMPLEX_TRANS_MASK_TMP |
77 TransliterationModules_IGNORE_KANA |
78 TransliterationModules_IGNORE_WIDTH;
79 // Above 2 transliteration is simple but need to take effect in
80 // complex transliteration
82 TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
83 : xMSF( rxMSF )
84 , pJumpTable( 0 )
85 , pJumpTable2( 0 )
86 , pRegExp( 0 )
87 , pWLD( 0 )
89 SearchOptions aOpt;
90 aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
91 aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
92 //aOpt.Locale = ???;
93 setOptions( aOpt );
96 TextSearch::~TextSearch()
98 delete pRegExp;
99 delete pWLD;
100 delete pJumpTable;
101 delete pJumpTable2;
104 void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
106 aSrchPara = rOptions;
108 delete pRegExp, pRegExp = 0;
109 delete pWLD, pWLD = 0;
110 delete pJumpTable, pJumpTable = 0;
111 delete pJumpTable2, pJumpTable2 = 0;
113 // Create Transliteration class
114 if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
116 if( !xTranslit.is() )
118 Reference < XInterface > xI = xMSF->createInstance(
119 OUString::createFromAscii(
120 "com.sun.star.i18n.Transliteration"));
121 if ( xI.is() )
122 xI->queryInterface( ::getCppuType(
123 (const Reference< XExtendedTransliteration >*)0))
124 >>= xTranslit;
126 // Load transliteration module
127 if( xTranslit.is() )
128 xTranslit->loadModule(
129 (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
130 aSrchPara.Locale);
132 else if( xTranslit.is() )
133 xTranslit = 0;
135 // Create Transliteration for 2<->1, 2<->2 transliteration
136 if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
138 if( !xTranslit2.is() )
140 Reference < XInterface > xI = xMSF->createInstance(
141 OUString::createFromAscii(
142 "com.sun.star.i18n.Transliteration"));
143 if ( xI.is() )
144 xI->queryInterface( ::getCppuType(
145 (const Reference< XExtendedTransliteration >*)0))
146 >>= xTranslit2;
148 // Load transliteration module
149 if( xTranslit2.is() )
150 xTranslit2->loadModule(
151 (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
152 aSrchPara.Locale);
155 if ( !xBreak.is() )
157 Reference < XInterface > xI = xMSF->createInstance(
158 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
159 if( xI.is() )
160 xI->queryInterface( ::getCppuType(
161 (const Reference< XBreakIterator >*)0))
162 >>= xBreak;
165 sSrchStr = aSrchPara.searchString;
167 // use transliteration here, but only if not RegEx, which does it different
168 if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() &&
169 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
170 sSrchStr = xTranslit->transliterateString2String(
171 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
173 if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() &&
174 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
175 sSrchStr2 = xTranslit2->transliterateString2String(
176 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
178 // When start or end of search string is a complex script type, we need to
179 // make sure the result boundary is not located in the middle of cell.
180 checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
181 ScriptType::COMPLEX));
182 checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
183 sSrchStr.getLength()-1) == ScriptType::COMPLEX));
185 if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP )
187 fnForward = &TextSearch::RESrchFrwrd;
188 fnBackward = &TextSearch::RESrchBkwrd;
190 pRegExp = new Regexpr( aSrchPara, xTranslit );
192 else
194 if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
196 fnForward = &TextSearch::ApproxSrchFrwrd;
197 fnBackward = &TextSearch::ApproxSrchBkwrd;
199 pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
200 aSrchPara.insertedChars, aSrchPara.deletedChars,
201 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
203 nLimit = pWLD->GetLimit();
205 else
207 fnForward = &TextSearch::NSrchFrwrd;
208 fnBackward = &TextSearch::NSrchBkwrd;
213 sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
215 sal_Int32 nRet = 0, nEnd = rOff.getLength();
216 while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
217 return nRet;
220 sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
221 throw( RuntimeException )
223 sal_Int32 nDone;
224 return nPos == xBreak->previousCharacters(searchStr, nPos+1,
225 aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
228 SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
229 throw( RuntimeException )
231 SearchResult sres;
233 OUString in_str(searchStr);
234 sal_Int32 newStartPos = startPos;
235 sal_Int32 newEndPos = endPos;
237 bUsePrimarySrchStr = true;
239 if ( xTranslit.is() )
241 // apply normal transliteration (1<->1, 1<->0)
242 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
243 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
245 // JP 20.6.2001: also the start and end positions must be corrected!
246 if( startPos )
247 newStartPos = FindPosInSeq_Impl( offset, startPos );
249 if( endPos < searchStr.getLength() )
250 newEndPos = FindPosInSeq_Impl( offset, endPos );
251 else
252 newEndPos = in_str.getLength();
254 sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
256 for ( int k = 0; k < sres.startOffset.getLength(); k++ )
258 if (sres.startOffset[k])
259 sres.startOffset[k] = offset[sres.startOffset[k]];
260 // JP 20.6.2001: end is ever exclusive and then don't return
261 // the position of the next character - return the
262 // next position behind the last found character!
263 // "a b c" find "b" must return 2,3 and not 2,4!!!
264 if (sres.endOffset[k])
265 sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
268 else
270 sres = (this->*fnForward)( in_str, startPos, endPos );
273 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
275 SearchResult sres2;
277 in_str = OUString(searchStr);
278 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
280 in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
282 if( startPos )
283 startPos = FindPosInSeq_Impl( offset, startPos );
285 if( endPos < searchStr.getLength() )
286 endPos = FindPosInSeq_Impl( offset, endPos );
287 else
288 endPos = in_str.getLength();
290 bUsePrimarySrchStr = false;
291 sres2 = (this->*fnForward)( in_str, startPos, endPos );
293 for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
295 if (sres2.startOffset[k])
296 sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
297 if (sres2.endOffset[k])
298 sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
301 // pick first and long one
302 if ( sres.subRegExpressions == 0)
303 return sres2;
304 if ( sres2.subRegExpressions == 1)
306 if ( sres.startOffset[0] > sres2.startOffset[0])
307 return sres2;
308 else if ( sres.startOffset[0] == sres2.startOffset[0] &&
309 sres.endOffset[0] < sres2.endOffset[0])
310 return sres2;
314 return sres;
317 SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
318 throw(RuntimeException)
320 SearchResult sres;
322 OUString in_str(searchStr);
323 sal_Int32 newStartPos = startPos;
324 sal_Int32 newEndPos = endPos;
326 bUsePrimarySrchStr = true;
328 if ( xTranslit.is() )
330 // apply only simple 1<->1 transliteration here
331 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
332 in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
334 // JP 20.6.2001: also the start and end positions must be corrected!
335 if( startPos < searchStr.getLength() )
336 newStartPos = FindPosInSeq_Impl( offset, startPos );
337 else
338 newStartPos = in_str.getLength();
340 if( endPos )
341 newEndPos = FindPosInSeq_Impl( offset, endPos );
343 sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
345 for ( int k = 0; k < sres.startOffset.getLength(); k++ )
347 if (sres.startOffset[k])
348 sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
349 // JP 20.6.2001: end is ever exclusive and then don't return
350 // the position of the next character - return the
351 // next position behind the last found character!
352 // "a b c" find "b" must return 2,3 and not 2,4!!!
353 if (sres.endOffset[k])
354 sres.endOffset[k] = offset[sres.endOffset[k]];
357 else
359 sres = (this->*fnBackward)( in_str, startPos, endPos );
362 if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
364 SearchResult sres2;
366 in_str = OUString(searchStr);
367 com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
369 in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
371 if( startPos < searchStr.getLength() )
372 startPos = FindPosInSeq_Impl( offset, startPos );
373 else
374 startPos = in_str.getLength();
376 if( endPos )
377 endPos = FindPosInSeq_Impl( offset, endPos );
379 bUsePrimarySrchStr = false;
380 sres2 = (this->*fnBackward)( in_str, startPos, endPos );
382 for( int k = 0; k < sres2.startOffset.getLength(); k++ )
384 if (sres2.startOffset[k])
385 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
386 if (sres2.endOffset[k])
387 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
390 // pick last and long one
391 if ( sres.subRegExpressions == 0 )
392 return sres2;
393 if ( sres2.subRegExpressions == 1 )
395 if ( sres.startOffset[0] < sres2.startOffset[0] )
396 return sres2;
397 if ( sres.startOffset[0] == sres2.startOffset[0] &&
398 sres.endOffset[0] > sres2.endOffset[0] )
399 return sres2;
403 return sres;
408 //--------------- die Wort-Trennner ----------------------------------
410 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
412 bool bRet = 1;
413 if( '\x7f' != rStr[nPos])
415 if ( !xCharClass.is() )
417 Reference < XInterface > xI = xMSF->createInstance(
418 OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
419 if( xI.is() )
420 xI->queryInterface( ::getCppuType(
421 (const Reference< XCharacterClassification >*)0))
422 >>= xCharClass;
424 if ( xCharClass.is() )
426 sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
427 aSrchPara.Locale );
428 if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
429 KCharacterType::LETTER ) & nCType ) )
430 bRet = 0;
433 return bRet;
438 // --------- methods for the kind of boyer-morre search ------------------
441 void TextSearch::MakeForwardTab()
443 // create the jumptable for the search text
444 if( pJumpTable )
446 if( bIsForwardTab )
447 return ; // the jumpTable is ok
448 delete pJumpTable;
450 bIsForwardTab = true;
452 sal_Int32 n, nLen = sSrchStr.getLength();
453 pJumpTable = new TextSearchJumpTable;
455 for( n = 0; n < nLen - 1; ++n )
457 sal_Unicode cCh = sSrchStr[n];
458 sal_Int32 nDiff = nLen - n - 1;
459 TextSearchJumpTable::value_type aEntry( cCh, nDiff );
461 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
462 pJumpTable->insert( aEntry );
463 if ( !aPair.second )
464 (*(aPair.first)).second = nDiff;
468 void TextSearch::MakeForwardTab2()
470 // create the jumptable for the search text
471 if( pJumpTable2 )
473 if( bIsForwardTab )
474 return ; // the jumpTable is ok
475 delete pJumpTable2;
477 bIsForwardTab = true;
479 sal_Int32 n, nLen = sSrchStr2.getLength();
480 pJumpTable2 = new TextSearchJumpTable;
482 for( n = 0; n < nLen - 1; ++n )
484 sal_Unicode cCh = sSrchStr2[n];
485 sal_Int32 nDiff = nLen - n - 1;
487 TextSearchJumpTable::value_type aEntry( cCh, nDiff );
488 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
489 pJumpTable2->insert( aEntry );
490 if ( !aPair.second )
491 (*(aPair.first)).second = nDiff;
495 void TextSearch::MakeBackwardTab()
497 // create the jumptable for the search text
498 if( pJumpTable )
500 if( !bIsForwardTab )
501 return ; // the jumpTable is ok
502 delete pJumpTable;
504 bIsForwardTab = false;
506 sal_Int32 n, nLen = sSrchStr.getLength();
507 pJumpTable = new TextSearchJumpTable;
509 for( n = nLen-1; n > 0; --n )
511 sal_Unicode cCh = sSrchStr[n];
512 TextSearchJumpTable::value_type aEntry( cCh, n );
513 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
514 pJumpTable->insert( aEntry );
515 if ( !aPair.second )
516 (*(aPair.first)).second = n;
520 void TextSearch::MakeBackwardTab2()
522 // create the jumptable for the search text
523 if( pJumpTable2 )
525 if( !bIsForwardTab )
526 return ; // the jumpTable is ok
527 delete pJumpTable2;
529 bIsForwardTab = false;
531 sal_Int32 n, nLen = sSrchStr2.getLength();
532 pJumpTable2 = new TextSearchJumpTable;
534 for( n = nLen-1; n > 0; --n )
536 sal_Unicode cCh = sSrchStr2[n];
537 TextSearchJumpTable::value_type aEntry( cCh, n );
538 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
539 pJumpTable2->insert( aEntry );
540 if ( !aPair.second )
541 (*(aPair.first)).second = n;
545 sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
547 TextSearchJumpTable *pJump;
548 OUString sSearchKey;
550 if ( bUsePrimarySrchStr ) {
551 pJump = pJumpTable;
552 sSearchKey = sSrchStr;
553 } else {
554 pJump = pJumpTable2;
555 sSearchKey = sSrchStr2;
558 TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
559 if ( iLook == pJump->end() )
560 return sSearchKey.getLength();
561 return (*iLook).second;
565 SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
566 throw(RuntimeException)
568 SearchResult aRet;
569 aRet.subRegExpressions = 0;
571 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
573 OUString aStr( searchStr );
574 sal_Int32 nSuchIdx = aStr.getLength();
575 sal_Int32 nEnde = endPos;
576 if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
577 return aRet;
580 if( nEnde < sSearchKey.getLength() ) // position inside the search region ?
581 return aRet;
583 nEnde -= sSearchKey.getLength();
585 if (bUsePrimarySrchStr)
586 MakeForwardTab(); // create the jumptable
587 else
588 MakeForwardTab2();
590 for (sal_Int32 nCmpIdx = startPos; // start position for the search
591 nCmpIdx <= nEnde;
592 nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
594 // if the match would be the completed cells, skip it.
595 if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
596 && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
597 continue;
599 nSuchIdx = sSearchKey.getLength() - 1;
600 while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
602 if( nSuchIdx == 0 )
604 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
606 sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
607 bool bAtStart = !nCmpIdx;
608 bool bAtEnd = nFndEnd == endPos;
609 bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
610 bool bDelimBehind = IsDelimiter( aStr, nFndEnd );
611 // * 1 -> only one word in the paragraph
612 // * 2 -> at begin of paragraph
613 // * 3 -> at end of paragraph
614 // * 4 -> inside the paragraph
615 if( !( ( bAtStart && bAtEnd ) || // 1
616 ( bAtStart && bDelimBehind ) || // 2
617 ( bAtEnd && bDelimBefore ) || // 3
618 ( bDelimBefore && bDelimBehind ))) // 4
619 break;
622 aRet.subRegExpressions = 1;
623 aRet.startOffset.realloc( 1 );
624 aRet.startOffset[ 0 ] = nCmpIdx;
625 aRet.endOffset.realloc( 1 );
626 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
628 return aRet;
630 else
631 nSuchIdx--;
634 return aRet;
637 SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
638 throw(RuntimeException)
640 SearchResult aRet;
641 aRet.subRegExpressions = 0;
643 OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
645 OUString aStr( searchStr );
646 sal_Int32 nSuchIdx = aStr.getLength();
647 sal_Int32 nEnde = endPos;
648 if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
649 return aRet;
651 if (bUsePrimarySrchStr)
652 MakeBackwardTab(); // create the jumptable
653 else
654 MakeBackwardTab2();
656 if( nEnde == nSuchIdx ) // end position for the search
657 nEnde = sSearchKey.getLength();
658 else
659 nEnde += sSearchKey.getLength();
661 sal_Int32 nCmpIdx = startPos; // start position for the search
663 while (nCmpIdx >= nEnde)
665 // if the match would be the completed cells, skip it.
666 if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
667 sSearchKey.getLength() )) && (!checkCTLEnd ||
668 isCellStart( aStr, nCmpIdx)))
670 nSuchIdx = 0;
671 while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
672 aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
673 nSuchIdx++;
674 if( nSuchIdx >= sSearchKey.getLength() )
676 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
678 sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
679 bool bAtStart = !nFndStt;
680 bool bAtEnd = nCmpIdx == startPos;
681 bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
682 bool bDelimBefore = bAtStart || // begin of paragraph
683 IsDelimiter( aStr, nFndStt-1 );
684 // * 1 -> only one word in the paragraph
685 // * 2 -> at begin of paragraph
686 // * 3 -> at end of paragraph
687 // * 4 -> inside the paragraph
688 if( ( bAtStart && bAtEnd ) || // 1
689 ( bAtStart && bDelimBehind ) || // 2
690 ( bAtEnd && bDelimBefore ) || // 3
691 ( bDelimBefore && bDelimBehind )) // 4
693 aRet.subRegExpressions = 1;
694 aRet.startOffset.realloc( 1 );
695 aRet.startOffset[ 0 ] = nCmpIdx;
696 aRet.endOffset.realloc( 1 );
697 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
698 return aRet;
701 else
703 aRet.subRegExpressions = 1;
704 aRet.startOffset.realloc( 1 );
705 aRet.startOffset[ 0 ] = nCmpIdx;
706 aRet.endOffset.realloc( 1 );
707 aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
708 return aRet;
712 nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
713 if( nCmpIdx < nSuchIdx )
714 return aRet;
715 nCmpIdx -= nSuchIdx;
717 return aRet;
722 //---------------------------------------------------------------------------
723 // ------- Methoden fuer die Suche ueber Regular-Expressions --------------
725 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
726 sal_Int32 startPos, sal_Int32 endPos )
727 throw(RuntimeException)
729 SearchResult aRet;
730 aRet.subRegExpressions = 0;
731 OUString aStr( searchStr );
733 bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
734 SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
736 pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
738 struct re_registers regs;
740 // Clear structure
741 memset((void *)&regs, 0, sizeof(struct re_registers));
742 if ( ! pRegExp->re_search(&regs, startPos) )
744 if( regs.num_of_match > 0 &&
745 (regs.start[0] != -1 && regs.end[0] != -1) )
747 aRet.startOffset.realloc(regs.num_of_match);
748 aRet.endOffset.realloc(regs.num_of_match);
750 sal_Int32 i = 0, j = 0;
751 while( j < regs.num_of_match )
753 if( regs.start[j] != -1 && regs.end[j] != -1 )
755 aRet.startOffset[i] = regs.start[j];
756 aRet.endOffset[i] = regs.end[j];
757 ++i;
759 ++j;
761 aRet.subRegExpressions = i;
763 if ( regs.num_regs > 0 )
765 if ( regs.start )
766 free(regs.start);
767 if ( regs.end )
768 free(regs.end);
772 return aRet;
776 * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
778 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
779 sal_Int32 startPos, sal_Int32 endPos )
780 throw(RuntimeException)
782 SearchResult aRet;
783 aRet.subRegExpressions = 0;
784 OUString aStr( searchStr );
786 sal_Int32 nOffset = 0;
787 sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
789 bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
790 SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
792 if( startPos )
793 nOffset = startPos - 1;
795 // search only in the subString
796 if( bSearchInSel && nStrEnde )
798 aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde );
799 if( nOffset > nStrEnde )
800 nOffset = nOffset - nStrEnde;
801 else
802 nOffset = 0;
805 // set the length to negative for reverse search
806 pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) );
807 struct re_registers regs;
809 // Clear structure
810 memset((void *)&regs, 0, sizeof(struct re_registers));
811 if ( ! pRegExp->re_search(&regs, nOffset) )
813 if( regs.num_of_match > 0 &&
814 (regs.start[0] != -1 && regs.end[0] != -1) )
816 nOffset = bSearchInSel ? nStrEnde : 0;
817 aRet.startOffset.realloc(regs.num_of_match);
818 aRet.endOffset.realloc(regs.num_of_match);
820 sal_Int32 i = 0, j = 0;
821 while( j < regs.num_of_match )
823 if( regs.start[j] != -1 && regs.end[j] != -1 )
825 aRet.startOffset[i] = regs.end[j] + nOffset;
826 aRet.endOffset[i] = regs.start[j] + nOffset;
827 ++i;
829 ++j;
831 aRet.subRegExpressions = i;
833 if ( regs.num_regs > 0 )
835 if ( regs.start )
836 free(regs.start);
837 if ( regs.end )
838 free(regs.end);
842 return aRet;
845 // Phonetische Suche von Worten
846 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
847 sal_Int32 startPos, sal_Int32 endPos )
848 throw(RuntimeException)
850 SearchResult aRet;
851 aRet.subRegExpressions = 0;
853 if( !xBreak.is() )
854 return aRet;
856 OUString aWTemp( searchStr );
858 register sal_Int32 nStt, nEnd;
860 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
861 aSrchPara.Locale,
862 WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
866 if( aWBnd.startPos >= endPos )
867 break;
868 nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
869 nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
871 if( nStt < nEnd &&
872 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
874 aRet.subRegExpressions = 1;
875 aRet.startOffset.realloc( 1 );
876 aRet.startOffset[ 0 ] = nStt;
877 aRet.endOffset.realloc( 1 );
878 aRet.endOffset[ 0 ] = nEnd;
879 break;
882 nStt = nEnd - 1;
883 aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
884 WordType::ANYWORD_IGNOREWHITESPACES);
885 } while( aWBnd.startPos != aWBnd.endPos ||
886 (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
887 // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
888 // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
889 // and nextWord() does also => don't loop forever.
890 return aRet;
893 SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
894 sal_Int32 startPos, sal_Int32 endPos )
895 throw(RuntimeException)
897 SearchResult aRet;
898 aRet.subRegExpressions = 0;
900 if( !xBreak.is() )
901 return aRet;
903 OUString aWTemp( searchStr );
905 register sal_Int32 nStt, nEnd;
907 Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
908 aSrchPara.Locale,
909 WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
913 if( aWBnd.endPos <= endPos )
914 break;
915 nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
916 nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
918 if( nStt < nEnd &&
919 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
921 aRet.subRegExpressions = 1;
922 aRet.startOffset.realloc( 1 );
923 aRet.startOffset[ 0 ] = nEnd;
924 aRet.endOffset.realloc( 1 );
925 aRet.endOffset[ 0 ] = nStt;
926 break;
928 if( !nStt )
929 break;
931 aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
932 WordType::ANYWORD_IGNOREWHITESPACES);
933 } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
934 return aRet;
938 static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
939 static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
941 static OUString getServiceName_Static()
943 return OUString::createFromAscii( cSearchName );
946 static OUString getImplementationName_Static()
948 return OUString::createFromAscii( cSearchImpl );
951 OUString SAL_CALL
952 TextSearch::getImplementationName()
953 throw( RuntimeException )
955 return getImplementationName_Static();
958 sal_Bool SAL_CALL
959 TextSearch::supportsService(const OUString& rServiceName)
960 throw( RuntimeException )
962 return !rServiceName.compareToAscii( cSearchName );
965 Sequence< OUString > SAL_CALL
966 TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
968 Sequence< OUString > aRet(1);
969 aRet[0] = getServiceName_Static();
970 return aRet;
973 ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
974 SAL_CALL TextSearch_CreateInstance(
975 const ::com::sun::star::uno::Reference<
976 ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
978 return ::com::sun::star::uno::Reference<
979 ::com::sun::star::uno::XInterface >(
980 (::cppu::OWeakObject*) new TextSearch( rxMSF ) );
983 extern "C"
986 void SAL_CALL component_getImplementationEnvironment(
987 const sal_Char** ppEnvTypeName, uno_Environment** /*ppEnv*/ )
989 *ppEnvTypeName = CPPU_CURRENT_LANGUAGE_BINDING_NAME;
992 sal_Bool SAL_CALL component_writeInfo(
993 void* /*_pServiceManager*/, void* _pRegistryKey )
995 if (_pRegistryKey)
997 ::com::sun::star::registry::XRegistryKey * pRegistryKey =
998 reinterpret_cast< ::com::sun::star::registry::XRegistryKey* >(
999 _pRegistryKey );
1000 ::com::sun::star::uno::Reference<
1001 ::com::sun::star::registry::XRegistryKey > xNewKey;
1003 xNewKey = pRegistryKey->createKey( getImplementationName_Static() );
1004 xNewKey = xNewKey->createKey(
1005 ::rtl::OUString::createFromAscii( "/UNO/SERVICES" ) );
1006 xNewKey->createKey( getServiceName_Static() );
1008 return sal_True;
1011 void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
1012 void* _pServiceManager, void* /*_pRegistryKey*/ )
1014 void* pRet = NULL;
1016 ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
1017 reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
1018 ( _pServiceManager );
1019 ::com::sun::star::uno::Reference<
1020 ::com::sun::star::lang::XSingleServiceFactory > xFactory;
1022 if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
1024 ::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
1025 aServiceNames[0] = getServiceName_Static();
1026 xFactory = ::cppu::createSingleFactory(
1027 pServiceManager, getImplementationName_Static(),
1028 &TextSearch_CreateInstance, aServiceNames );
1031 if ( xFactory.is() )
1033 xFactory->acquire();
1034 pRet = xFactory.get();
1037 return pRet;
1040 } // extern "C"