1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: textsearch.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_i18npool.hxx"
34 #include "textsearch.hxx"
36 #include <regexp/reclass.hxx>
37 #include <com/sun/star/lang/Locale.hpp>
38 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
39 #include <comphelper/processfactory.hxx>
40 #include <com/sun/star/i18n/UnicodeType.hpp>
41 #include <com/sun/star/util/SearchFlags.hpp>
42 #include <com/sun/star/i18n/WordType.hpp>
43 #include <com/sun/star/i18n/ScriptType.hpp>
44 #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
45 #include <com/sun/star/i18n/KCharacterType.hpp>
46 #include <com/sun/star/registry/XRegistryKey.hpp>
47 #include <cppuhelper/factory.hxx>
48 #include <cppuhelper/weak.hxx>
51 // get rid of that dumb compiler warning
52 // identifier was truncated to '255' characters in the debug information
53 // for STL template usage, if .pdb files are to be created
54 #pragma warning( disable: 4786 )
59 using namespace ::com::sun::star::util
;
60 using namespace ::com::sun::star::uno
;
61 using namespace ::com::sun::star::lang
;
62 using namespace ::com::sun::star::i18n
;
63 using namespace ::rtl
;
65 static sal_Int32 COMPLEX_TRANS_MASK_TMP
=
66 TransliterationModules_ignoreBaFa_ja_JP
|
67 TransliterationModules_ignoreIterationMark_ja_JP
|
68 TransliterationModules_ignoreTiJi_ja_JP
|
69 TransliterationModules_ignoreHyuByu_ja_JP
|
70 TransliterationModules_ignoreSeZe_ja_JP
|
71 TransliterationModules_ignoreIandEfollowedByYa_ja_JP
|
72 TransliterationModules_ignoreKiKuFollowedBySa_ja_JP
|
73 TransliterationModules_ignoreProlongedSoundMark_ja_JP
;
74 static const sal_Int32 SIMPLE_TRANS_MASK
= 0xffffffff ^ COMPLEX_TRANS_MASK_TMP
;
75 static const sal_Int32 COMPLEX_TRANS_MASK
=
76 COMPLEX_TRANS_MASK_TMP
|
77 TransliterationModules_IGNORE_KANA
|
78 TransliterationModules_IGNORE_WIDTH
;
79 // Above 2 transliteration is simple but need to take effect in
80 // complex transliteration
82 TextSearch::TextSearch(const Reference
< XMultiServiceFactory
> & rxMSF
)
90 aOpt
.algorithmType
= SearchAlgorithms_ABSOLUTE
;
91 aOpt
.searchFlag
= SearchFlags::ALL_IGNORE_CASE
;
96 TextSearch::~TextSearch()
104 void TextSearch::setOptions( const SearchOptions
& rOptions
) throw( RuntimeException
)
106 aSrchPara
= rOptions
;
108 delete pRegExp
, pRegExp
= 0;
109 delete pWLD
, pWLD
= 0;
110 delete pJumpTable
, pJumpTable
= 0;
111 delete pJumpTable2
, pJumpTable2
= 0;
113 // Create Transliteration class
114 if( aSrchPara
.transliterateFlags
& SIMPLE_TRANS_MASK
)
116 if( !xTranslit
.is() )
118 Reference
< XInterface
> xI
= xMSF
->createInstance(
119 OUString::createFromAscii(
120 "com.sun.star.i18n.Transliteration"));
122 xI
->queryInterface( ::getCppuType(
123 (const Reference
< XExtendedTransliteration
>*)0))
126 // Load transliteration module
128 xTranslit
->loadModule(
129 (TransliterationModules
)( aSrchPara
.transliterateFlags
& SIMPLE_TRANS_MASK
),
132 else if( xTranslit
.is() )
135 // Create Transliteration for 2<->1, 2<->2 transliteration
136 if ( aSrchPara
.transliterateFlags
& COMPLEX_TRANS_MASK
)
138 if( !xTranslit2
.is() )
140 Reference
< XInterface
> xI
= xMSF
->createInstance(
141 OUString::createFromAscii(
142 "com.sun.star.i18n.Transliteration"));
144 xI
->queryInterface( ::getCppuType(
145 (const Reference
< XExtendedTransliteration
>*)0))
148 // Load transliteration module
149 if( xTranslit2
.is() )
150 xTranslit2
->loadModule(
151 (TransliterationModules
)( aSrchPara
.transliterateFlags
& COMPLEX_TRANS_MASK
),
157 Reference
< XInterface
> xI
= xMSF
->createInstance(
158 OUString::createFromAscii( "com.sun.star.i18n.BreakIterator"));
160 xI
->queryInterface( ::getCppuType(
161 (const Reference
< XBreakIterator
>*)0))
165 sSrchStr
= aSrchPara
.searchString
;
167 // use transliteration here, but only if not RegEx, which does it different
168 if ( aSrchPara
.algorithmType
!= SearchAlgorithms_REGEXP
&& xTranslit
.is() &&
169 aSrchPara
.transliterateFlags
& SIMPLE_TRANS_MASK
)
170 sSrchStr
= xTranslit
->transliterateString2String(
171 aSrchPara
.searchString
, 0, aSrchPara
.searchString
.getLength());
173 if ( aSrchPara
.algorithmType
!= SearchAlgorithms_REGEXP
&& xTranslit2
.is() &&
174 aSrchPara
.transliterateFlags
& COMPLEX_TRANS_MASK
)
175 sSrchStr2
= xTranslit2
->transliterateString2String(
176 aSrchPara
.searchString
, 0, aSrchPara
.searchString
.getLength());
178 // When start or end of search string is a complex script type, we need to
179 // make sure the result boundary is not located in the middle of cell.
180 checkCTLStart
= (xBreak
.is() && (xBreak
->getScriptType(sSrchStr
, 0) ==
181 ScriptType::COMPLEX
));
182 checkCTLEnd
= (xBreak
.is() && (xBreak
->getScriptType(sSrchStr
,
183 sSrchStr
.getLength()-1) == ScriptType::COMPLEX
));
185 if ( aSrchPara
.algorithmType
== SearchAlgorithms_REGEXP
)
187 fnForward
= &TextSearch::RESrchFrwrd
;
188 fnBackward
= &TextSearch::RESrchBkwrd
;
190 pRegExp
= new Regexpr( aSrchPara
, xTranslit
);
194 if ( aSrchPara
.algorithmType
== SearchAlgorithms_APPROXIMATE
)
196 fnForward
= &TextSearch::ApproxSrchFrwrd
;
197 fnBackward
= &TextSearch::ApproxSrchBkwrd
;
199 pWLD
= new WLevDistance( sSrchStr
.getStr(), aSrchPara
.changedChars
,
200 aSrchPara
.insertedChars
, aSrchPara
.deletedChars
,
201 0 != (SearchFlags::LEV_RELAXED
& aSrchPara
.searchFlag
) );
203 nLimit
= pWLD
->GetLimit();
207 fnForward
= &TextSearch::NSrchFrwrd
;
208 fnBackward
= &TextSearch::NSrchBkwrd
;
213 sal_Int32
FindPosInSeq_Impl( const Sequence
<sal_Int32
>& rOff
, sal_Int32 nPos
)
215 sal_Int32 nRet
= 0, nEnd
= rOff
.getLength();
216 while( nRet
< nEnd
&& nPos
> rOff
[ nRet
] ) ++nRet
;
220 sal_Bool
TextSearch::isCellStart(const OUString
& searchStr
, sal_Int32 nPos
)
221 throw( RuntimeException
)
224 return nPos
== xBreak
->previousCharacters(searchStr
, nPos
+1,
225 aSrchPara
.Locale
, CharacterIteratorMode::SKIPCELL
, 1, nDone
);
228 SearchResult
TextSearch::searchForward( const OUString
& searchStr
, sal_Int32 startPos
, sal_Int32 endPos
)
229 throw( RuntimeException
)
233 OUString
in_str(searchStr
);
234 sal_Int32 newStartPos
= startPos
;
235 sal_Int32 newEndPos
= endPos
;
237 bUsePrimarySrchStr
= true;
239 if ( xTranslit
.is() )
241 // apply normal transliteration (1<->1, 1<->0)
242 com::sun::star::uno::Sequence
<sal_Int32
> offset( in_str
.getLength());
243 in_str
= xTranslit
->transliterate( searchStr
, 0, in_str
.getLength(), offset
);
245 // JP 20.6.2001: also the start and end positions must be corrected!
247 newStartPos
= FindPosInSeq_Impl( offset
, startPos
);
249 if( endPos
< searchStr
.getLength() )
250 newEndPos
= FindPosInSeq_Impl( offset
, endPos
);
252 newEndPos
= in_str
.getLength();
254 sres
= (this->*fnForward
)( in_str
, newStartPos
, newEndPos
);
256 for ( int k
= 0; k
< sres
.startOffset
.getLength(); k
++ )
258 if (sres
.startOffset
[k
])
259 sres
.startOffset
[k
] = offset
[sres
.startOffset
[k
]];
260 // JP 20.6.2001: end is ever exclusive and then don't return
261 // the position of the next character - return the
262 // next position behind the last found character!
263 // "a b c" find "b" must return 2,3 and not 2,4!!!
264 if (sres
.endOffset
[k
])
265 sres
.endOffset
[k
] = offset
[sres
.endOffset
[k
]-1] + 1;
270 sres
= (this->*fnForward
)( in_str
, startPos
, endPos
);
273 if ( xTranslit2
.is() && aSrchPara
.algorithmType
!= SearchAlgorithms_REGEXP
)
277 in_str
= OUString(searchStr
);
278 com::sun::star::uno::Sequence
<sal_Int32
> offset( in_str
.getLength());
280 in_str
= xTranslit2
->transliterate( searchStr
, 0, in_str
.getLength(), offset
);
283 startPos
= FindPosInSeq_Impl( offset
, startPos
);
285 if( endPos
< searchStr
.getLength() )
286 endPos
= FindPosInSeq_Impl( offset
, endPos
);
288 endPos
= in_str
.getLength();
290 bUsePrimarySrchStr
= false;
291 sres2
= (this->*fnForward
)( in_str
, startPos
, endPos
);
293 for ( int k
= 0; k
< sres2
.startOffset
.getLength(); k
++ )
295 if (sres2
.startOffset
[k
])
296 sres2
.startOffset
[k
] = offset
[sres2
.startOffset
[k
]-1] + 1;
297 if (sres2
.endOffset
[k
])
298 sres2
.endOffset
[k
] = offset
[sres2
.endOffset
[k
]-1] + 1;
301 // pick first and long one
302 if ( sres
.subRegExpressions
== 0)
304 if ( sres2
.subRegExpressions
== 1)
306 if ( sres
.startOffset
[0] > sres2
.startOffset
[0])
308 else if ( sres
.startOffset
[0] == sres2
.startOffset
[0] &&
309 sres
.endOffset
[0] < sres2
.endOffset
[0])
317 SearchResult
TextSearch::searchBackward( const OUString
& searchStr
, sal_Int32 startPos
, sal_Int32 endPos
)
318 throw(RuntimeException
)
322 OUString
in_str(searchStr
);
323 sal_Int32 newStartPos
= startPos
;
324 sal_Int32 newEndPos
= endPos
;
326 bUsePrimarySrchStr
= true;
328 if ( xTranslit
.is() )
330 // apply only simple 1<->1 transliteration here
331 com::sun::star::uno::Sequence
<sal_Int32
> offset( in_str
.getLength());
332 in_str
= xTranslit
->transliterate( searchStr
, 0, in_str
.getLength(), offset
);
334 // JP 20.6.2001: also the start and end positions must be corrected!
335 if( startPos
< searchStr
.getLength() )
336 newStartPos
= FindPosInSeq_Impl( offset
, startPos
);
338 newStartPos
= in_str
.getLength();
341 newEndPos
= FindPosInSeq_Impl( offset
, endPos
);
343 sres
= (this->*fnBackward
)( in_str
, newStartPos
, newEndPos
);
345 for ( int k
= 0; k
< sres
.startOffset
.getLength(); k
++ )
347 if (sres
.startOffset
[k
])
348 sres
.startOffset
[k
] = offset
[sres
.startOffset
[k
] - 1] + 1;
349 // JP 20.6.2001: end is ever exclusive and then don't return
350 // the position of the next character - return the
351 // next position behind the last found character!
352 // "a b c" find "b" must return 2,3 and not 2,4!!!
353 if (sres
.endOffset
[k
])
354 sres
.endOffset
[k
] = offset
[sres
.endOffset
[k
]];
359 sres
= (this->*fnBackward
)( in_str
, startPos
, endPos
);
362 if ( xTranslit2
.is() && aSrchPara
.algorithmType
!= SearchAlgorithms_REGEXP
)
366 in_str
= OUString(searchStr
);
367 com::sun::star::uno::Sequence
<sal_Int32
> offset( in_str
.getLength());
369 in_str
= xTranslit2
->transliterate(searchStr
, 0, in_str
.getLength(), offset
);
371 if( startPos
< searchStr
.getLength() )
372 startPos
= FindPosInSeq_Impl( offset
, startPos
);
374 startPos
= in_str
.getLength();
377 endPos
= FindPosInSeq_Impl( offset
, endPos
);
379 bUsePrimarySrchStr
= false;
380 sres2
= (this->*fnBackward
)( in_str
, startPos
, endPos
);
382 for( int k
= 0; k
< sres2
.startOffset
.getLength(); k
++ )
384 if (sres2
.startOffset
[k
])
385 sres2
.startOffset
[k
] = offset
[sres2
.startOffset
[k
]-1]+1;
386 if (sres2
.endOffset
[k
])
387 sres2
.endOffset
[k
] = offset
[sres2
.endOffset
[k
]-1]+1;
390 // pick last and long one
391 if ( sres
.subRegExpressions
== 0 )
393 if ( sres2
.subRegExpressions
== 1 )
395 if ( sres
.startOffset
[0] < sres2
.startOffset
[0] )
397 if ( sres
.startOffset
[0] == sres2
.startOffset
[0] &&
398 sres
.endOffset
[0] > sres2
.endOffset
[0] )
408 //--------------- die Wort-Trennner ----------------------------------
410 bool TextSearch::IsDelimiter( const OUString
& rStr
, sal_Int32 nPos
) const
413 if( '\x7f' != rStr
[nPos
])
415 if ( !xCharClass
.is() )
417 Reference
< XInterface
> xI
= xMSF
->createInstance(
418 OUString::createFromAscii( "com.sun.star.i18n.CharacterClassification"));
420 xI
->queryInterface( ::getCppuType(
421 (const Reference
< XCharacterClassification
>*)0))
424 if ( xCharClass
.is() )
426 sal_Int32 nCType
= xCharClass
->getCharacterType( rStr
, nPos
,
428 if( 0 != (( KCharacterType::DIGIT
| KCharacterType::ALPHA
|
429 KCharacterType::LETTER
) & nCType
) )
438 // --------- methods for the kind of boyer-morre search ------------------
441 void TextSearch::MakeForwardTab()
443 // create the jumptable for the search text
447 return ; // the jumpTable is ok
450 bIsForwardTab
= true;
452 sal_Int32 n
, nLen
= sSrchStr
.getLength();
453 pJumpTable
= new TextSearchJumpTable
;
455 for( n
= 0; n
< nLen
- 1; ++n
)
457 sal_Unicode cCh
= sSrchStr
[n
];
458 sal_Int32 nDiff
= nLen
- n
- 1;
459 TextSearchJumpTable::value_type
aEntry( cCh
, nDiff
);
461 ::std::pair
< TextSearchJumpTable::iterator
, bool > aPair
=
462 pJumpTable
->insert( aEntry
);
464 (*(aPair
.first
)).second
= nDiff
;
468 void TextSearch::MakeForwardTab2()
470 // create the jumptable for the search text
474 return ; // the jumpTable is ok
477 bIsForwardTab
= true;
479 sal_Int32 n
, nLen
= sSrchStr2
.getLength();
480 pJumpTable2
= new TextSearchJumpTable
;
482 for( n
= 0; n
< nLen
- 1; ++n
)
484 sal_Unicode cCh
= sSrchStr2
[n
];
485 sal_Int32 nDiff
= nLen
- n
- 1;
487 TextSearchJumpTable::value_type
aEntry( cCh
, nDiff
);
488 ::std::pair
< TextSearchJumpTable::iterator
, bool > aPair
=
489 pJumpTable2
->insert( aEntry
);
491 (*(aPair
.first
)).second
= nDiff
;
495 void TextSearch::MakeBackwardTab()
497 // create the jumptable for the search text
501 return ; // the jumpTable is ok
504 bIsForwardTab
= false;
506 sal_Int32 n
, nLen
= sSrchStr
.getLength();
507 pJumpTable
= new TextSearchJumpTable
;
509 for( n
= nLen
-1; n
> 0; --n
)
511 sal_Unicode cCh
= sSrchStr
[n
];
512 TextSearchJumpTable::value_type
aEntry( cCh
, n
);
513 ::std::pair
< TextSearchJumpTable::iterator
, bool > aPair
=
514 pJumpTable
->insert( aEntry
);
516 (*(aPair
.first
)).second
= n
;
520 void TextSearch::MakeBackwardTab2()
522 // create the jumptable for the search text
526 return ; // the jumpTable is ok
529 bIsForwardTab
= false;
531 sal_Int32 n
, nLen
= sSrchStr2
.getLength();
532 pJumpTable2
= new TextSearchJumpTable
;
534 for( n
= nLen
-1; n
> 0; --n
)
536 sal_Unicode cCh
= sSrchStr2
[n
];
537 TextSearchJumpTable::value_type
aEntry( cCh
, n
);
538 ::std::pair
< TextSearchJumpTable::iterator
, bool > aPair
=
539 pJumpTable2
->insert( aEntry
);
541 (*(aPair
.first
)).second
= n
;
545 sal_Int32
TextSearch::GetDiff( const sal_Unicode cChr
) const
547 TextSearchJumpTable
*pJump
;
550 if ( bUsePrimarySrchStr
) {
552 sSearchKey
= sSrchStr
;
555 sSearchKey
= sSrchStr2
;
558 TextSearchJumpTable::const_iterator iLook
= pJump
->find( cChr
);
559 if ( iLook
== pJump
->end() )
560 return sSearchKey
.getLength();
561 return (*iLook
).second
;
565 SearchResult
TextSearch::NSrchFrwrd( const OUString
& searchStr
, sal_Int32 startPos
, sal_Int32 endPos
)
566 throw(RuntimeException
)
569 aRet
.subRegExpressions
= 0;
571 OUString sSearchKey
= bUsePrimarySrchStr
? sSrchStr
: sSrchStr2
;
573 OUString
aStr( searchStr
);
574 sal_Int32 nSuchIdx
= aStr
.getLength();
575 sal_Int32 nEnde
= endPos
;
576 if( !nSuchIdx
|| !sSearchKey
.getLength() || sSearchKey
.getLength() > nSuchIdx
)
580 if( nEnde
< sSearchKey
.getLength() ) // position inside the search region ?
583 nEnde
-= sSearchKey
.getLength();
585 if (bUsePrimarySrchStr
)
586 MakeForwardTab(); // create the jumptable
590 for (sal_Int32 nCmpIdx
= startPos
; // start position for the search
592 nCmpIdx
+= GetDiff( aStr
[nCmpIdx
+ sSearchKey
.getLength()-1]))
594 // if the match would be the completed cells, skip it.
595 if ( (checkCTLStart
&& !isCellStart( aStr
, nCmpIdx
)) || (checkCTLEnd
596 && !isCellStart( aStr
, nCmpIdx
+ sSearchKey
.getLength())) )
599 nSuchIdx
= sSearchKey
.getLength() - 1;
600 while( nSuchIdx
>= 0 && sSearchKey
[nSuchIdx
] == aStr
[nCmpIdx
+ nSuchIdx
])
604 if( SearchFlags::NORM_WORD_ONLY
& aSrchPara
.searchFlag
)
606 sal_Int32 nFndEnd
= nCmpIdx
+ sSearchKey
.getLength();
607 bool bAtStart
= !nCmpIdx
;
608 bool bAtEnd
= nFndEnd
== endPos
;
609 bool bDelimBefore
= bAtStart
|| IsDelimiter( aStr
, nCmpIdx
-1 );
610 bool bDelimBehind
= IsDelimiter( aStr
, nFndEnd
);
611 // * 1 -> only one word in the paragraph
612 // * 2 -> at begin of paragraph
613 // * 3 -> at end of paragraph
614 // * 4 -> inside the paragraph
615 if( !( ( bAtStart
&& bAtEnd
) || // 1
616 ( bAtStart
&& bDelimBehind
) || // 2
617 ( bAtEnd
&& bDelimBefore
) || // 3
618 ( bDelimBefore
&& bDelimBehind
))) // 4
622 aRet
.subRegExpressions
= 1;
623 aRet
.startOffset
.realloc( 1 );
624 aRet
.startOffset
[ 0 ] = nCmpIdx
;
625 aRet
.endOffset
.realloc( 1 );
626 aRet
.endOffset
[ 0 ] = nCmpIdx
+ sSearchKey
.getLength();
637 SearchResult
TextSearch::NSrchBkwrd( const OUString
& searchStr
, sal_Int32 startPos
, sal_Int32 endPos
)
638 throw(RuntimeException
)
641 aRet
.subRegExpressions
= 0;
643 OUString sSearchKey
= bUsePrimarySrchStr
? sSrchStr
: sSrchStr2
;
645 OUString
aStr( searchStr
);
646 sal_Int32 nSuchIdx
= aStr
.getLength();
647 sal_Int32 nEnde
= endPos
;
648 if( nSuchIdx
== 0 || sSearchKey
.getLength() == 0 || sSearchKey
.getLength() > nSuchIdx
)
651 if (bUsePrimarySrchStr
)
652 MakeBackwardTab(); // create the jumptable
656 if( nEnde
== nSuchIdx
) // end position for the search
657 nEnde
= sSearchKey
.getLength();
659 nEnde
+= sSearchKey
.getLength();
661 sal_Int32 nCmpIdx
= startPos
; // start position for the search
663 while (nCmpIdx
>= nEnde
)
665 // if the match would be the completed cells, skip it.
666 if ( (!checkCTLStart
|| isCellStart( aStr
, nCmpIdx
-
667 sSearchKey
.getLength() )) && (!checkCTLEnd
||
668 isCellStart( aStr
, nCmpIdx
)))
671 while( nSuchIdx
< sSearchKey
.getLength() && sSearchKey
[nSuchIdx
] ==
672 aStr
[nCmpIdx
+ nSuchIdx
- sSearchKey
.getLength()] )
674 if( nSuchIdx
>= sSearchKey
.getLength() )
676 if( SearchFlags::NORM_WORD_ONLY
& aSrchPara
.searchFlag
)
678 sal_Int32 nFndStt
= nCmpIdx
- sSearchKey
.getLength();
679 bool bAtStart
= !nFndStt
;
680 bool bAtEnd
= nCmpIdx
== startPos
;
681 bool bDelimBehind
= IsDelimiter( aStr
, nCmpIdx
);
682 bool bDelimBefore
= bAtStart
|| // begin of paragraph
683 IsDelimiter( aStr
, nFndStt
-1 );
684 // * 1 -> only one word in the paragraph
685 // * 2 -> at begin of paragraph
686 // * 3 -> at end of paragraph
687 // * 4 -> inside the paragraph
688 if( ( bAtStart
&& bAtEnd
) || // 1
689 ( bAtStart
&& bDelimBehind
) || // 2
690 ( bAtEnd
&& bDelimBefore
) || // 3
691 ( bDelimBefore
&& bDelimBehind
)) // 4
693 aRet
.subRegExpressions
= 1;
694 aRet
.startOffset
.realloc( 1 );
695 aRet
.startOffset
[ 0 ] = nCmpIdx
;
696 aRet
.endOffset
.realloc( 1 );
697 aRet
.endOffset
[ 0 ] = nCmpIdx
- sSearchKey
.getLength();
703 aRet
.subRegExpressions
= 1;
704 aRet
.startOffset
.realloc( 1 );
705 aRet
.startOffset
[ 0 ] = nCmpIdx
;
706 aRet
.endOffset
.realloc( 1 );
707 aRet
.endOffset
[ 0 ] = nCmpIdx
- sSearchKey
.getLength();
712 nSuchIdx
= GetDiff( aStr
[nCmpIdx
- sSearchKey
.getLength()] );
713 if( nCmpIdx
< nSuchIdx
)
722 //---------------------------------------------------------------------------
723 // ------- Methoden fuer die Suche ueber Regular-Expressions --------------
725 SearchResult
TextSearch::RESrchFrwrd( const OUString
& searchStr
,
726 sal_Int32 startPos
, sal_Int32 endPos
)
727 throw(RuntimeException
)
730 aRet
.subRegExpressions
= 0;
731 OUString
aStr( searchStr
);
733 bool bSearchInSel
= (0 != (( SearchFlags::REG_NOT_BEGINOFLINE
|
734 SearchFlags::REG_NOT_ENDOFLINE
) & aSrchPara
.searchFlag
));
736 pRegExp
->set_line(aStr
.getStr(), bSearchInSel
? endPos
: aStr
.getLength());
738 struct re_registers regs
;
741 memset((void *)®s
, 0, sizeof(struct re_registers
));
742 if ( ! pRegExp
->re_search(®s
, startPos
) )
744 if( regs
.num_of_match
> 0 &&
745 (regs
.start
[0] != -1 && regs
.end
[0] != -1) )
747 aRet
.startOffset
.realloc(regs
.num_of_match
);
748 aRet
.endOffset
.realloc(regs
.num_of_match
);
750 sal_Int32 i
= 0, j
= 0;
751 while( j
< regs
.num_of_match
)
753 if( regs
.start
[j
] != -1 && regs
.end
[j
] != -1 )
755 aRet
.startOffset
[i
] = regs
.start
[j
];
756 aRet
.endOffset
[i
] = regs
.end
[j
];
761 aRet
.subRegExpressions
= i
;
763 if ( regs
.num_regs
> 0 )
776 * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
778 SearchResult
TextSearch::RESrchBkwrd( const OUString
& searchStr
,
779 sal_Int32 startPos
, sal_Int32 endPos
)
780 throw(RuntimeException
)
783 aRet
.subRegExpressions
= 0;
784 OUString
aStr( searchStr
);
786 sal_Int32 nOffset
= 0;
787 sal_Int32 nStrEnde
= aStr
.getLength() == endPos
? 0 : endPos
;
789 bool bSearchInSel
= (0 != (( SearchFlags::REG_NOT_BEGINOFLINE
|
790 SearchFlags::REG_NOT_ENDOFLINE
) & aSrchPara
.searchFlag
));
793 nOffset
= startPos
- 1;
795 // search only in the subString
796 if( bSearchInSel
&& nStrEnde
)
798 aStr
= aStr
.copy( nStrEnde
, aStr
.getLength() - nStrEnde
);
799 if( nOffset
> nStrEnde
)
800 nOffset
= nOffset
- nStrEnde
;
805 // set the length to negative for reverse search
806 pRegExp
->set_line( aStr
.getStr(), -(aStr
.getLength()) );
807 struct re_registers regs
;
810 memset((void *)®s
, 0, sizeof(struct re_registers
));
811 if ( ! pRegExp
->re_search(®s
, nOffset
) )
813 if( regs
.num_of_match
> 0 &&
814 (regs
.start
[0] != -1 && regs
.end
[0] != -1) )
816 nOffset
= bSearchInSel
? nStrEnde
: 0;
817 aRet
.startOffset
.realloc(regs
.num_of_match
);
818 aRet
.endOffset
.realloc(regs
.num_of_match
);
820 sal_Int32 i
= 0, j
= 0;
821 while( j
< regs
.num_of_match
)
823 if( regs
.start
[j
] != -1 && regs
.end
[j
] != -1 )
825 aRet
.startOffset
[i
] = regs
.end
[j
] + nOffset
;
826 aRet
.endOffset
[i
] = regs
.start
[j
] + nOffset
;
831 aRet
.subRegExpressions
= i
;
833 if ( regs
.num_regs
> 0 )
845 // Phonetische Suche von Worten
846 SearchResult
TextSearch::ApproxSrchFrwrd( const OUString
& searchStr
,
847 sal_Int32 startPos
, sal_Int32 endPos
)
848 throw(RuntimeException
)
851 aRet
.subRegExpressions
= 0;
856 OUString
aWTemp( searchStr
);
858 register sal_Int32 nStt
, nEnd
;
860 Boundary aWBnd
= xBreak
->getWordBoundary( aWTemp
, startPos
,
862 WordType::ANYWORD_IGNOREWHITESPACES
, sal_True
);
866 if( aWBnd
.startPos
>= endPos
)
868 nStt
= aWBnd
.startPos
< startPos
? startPos
: aWBnd
.startPos
;
869 nEnd
= aWBnd
.endPos
> endPos
? endPos
: aWBnd
.endPos
;
872 pWLD
->WLD( aWTemp
.getStr() + nStt
, nEnd
- nStt
) <= nLimit
)
874 aRet
.subRegExpressions
= 1;
875 aRet
.startOffset
.realloc( 1 );
876 aRet
.startOffset
[ 0 ] = nStt
;
877 aRet
.endOffset
.realloc( 1 );
878 aRet
.endOffset
[ 0 ] = nEnd
;
883 aWBnd
= xBreak
->nextWord( aWTemp
, nStt
, aSrchPara
.Locale
,
884 WordType::ANYWORD_IGNOREWHITESPACES
);
885 } while( aWBnd
.startPos
!= aWBnd
.endPos
||
886 (aWBnd
.endPos
!= aWTemp
.getLength() && aWBnd
.endPos
!= nEnd
) );
887 // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
888 // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
889 // and nextWord() does also => don't loop forever.
893 SearchResult
TextSearch::ApproxSrchBkwrd( const OUString
& searchStr
,
894 sal_Int32 startPos
, sal_Int32 endPos
)
895 throw(RuntimeException
)
898 aRet
.subRegExpressions
= 0;
903 OUString
aWTemp( searchStr
);
905 register sal_Int32 nStt
, nEnd
;
907 Boundary aWBnd
= xBreak
->getWordBoundary( aWTemp
, startPos
,
909 WordType::ANYWORD_IGNOREWHITESPACES
, sal_True
);
913 if( aWBnd
.endPos
<= endPos
)
915 nStt
= aWBnd
.startPos
< endPos
? endPos
: aWBnd
.startPos
;
916 nEnd
= aWBnd
.endPos
> startPos
? startPos
: aWBnd
.endPos
;
919 pWLD
->WLD( aWTemp
.getStr() + nStt
, nEnd
- nStt
) <= nLimit
)
921 aRet
.subRegExpressions
= 1;
922 aRet
.startOffset
.realloc( 1 );
923 aRet
.startOffset
[ 0 ] = nEnd
;
924 aRet
.endOffset
.realloc( 1 );
925 aRet
.endOffset
[ 0 ] = nStt
;
931 aWBnd
= xBreak
->previousWord( aWTemp
, nStt
, aSrchPara
.Locale
,
932 WordType::ANYWORD_IGNOREWHITESPACES
);
933 } while( aWBnd
.startPos
!= aWBnd
.endPos
|| aWBnd
.endPos
!= aWTemp
.getLength() );
938 static const sal_Char cSearchName
[] = "com.sun.star.util.TextSearch";
939 static const sal_Char cSearchImpl
[] = "com.sun.star.util.TextSearch_i18n";
941 static OUString
getServiceName_Static()
943 return OUString::createFromAscii( cSearchName
);
946 static OUString
getImplementationName_Static()
948 return OUString::createFromAscii( cSearchImpl
);
952 TextSearch::getImplementationName()
953 throw( RuntimeException
)
955 return getImplementationName_Static();
959 TextSearch::supportsService(const OUString
& rServiceName
)
960 throw( RuntimeException
)
962 return !rServiceName
.compareToAscii( cSearchName
);
965 Sequence
< OUString
> SAL_CALL
966 TextSearch::getSupportedServiceNames(void) throw( RuntimeException
)
968 Sequence
< OUString
> aRet(1);
969 aRet
[0] = getServiceName_Static();
973 ::com::sun::star::uno::Reference
< ::com::sun::star::uno::XInterface
>
974 SAL_CALL
TextSearch_CreateInstance(
975 const ::com::sun::star::uno::Reference
<
976 ::com::sun::star::lang::XMultiServiceFactory
>& rxMSF
)
978 return ::com::sun::star::uno::Reference
<
979 ::com::sun::star::uno::XInterface
>(
980 (::cppu::OWeakObject
*) new TextSearch( rxMSF
) );
986 void SAL_CALL
component_getImplementationEnvironment(
987 const sal_Char
** ppEnvTypeName
, uno_Environment
** /*ppEnv*/ )
989 *ppEnvTypeName
= CPPU_CURRENT_LANGUAGE_BINDING_NAME
;
992 sal_Bool SAL_CALL
component_writeInfo(
993 void* /*_pServiceManager*/, void* _pRegistryKey
)
997 ::com::sun::star::registry::XRegistryKey
* pRegistryKey
=
998 reinterpret_cast< ::com::sun::star::registry::XRegistryKey
* >(
1000 ::com::sun::star::uno::Reference
<
1001 ::com::sun::star::registry::XRegistryKey
> xNewKey
;
1003 xNewKey
= pRegistryKey
->createKey( getImplementationName_Static() );
1004 xNewKey
= xNewKey
->createKey(
1005 ::rtl::OUString::createFromAscii( "/UNO/SERVICES" ) );
1006 xNewKey
->createKey( getServiceName_Static() );
1011 void* SAL_CALL
component_getFactory( const sal_Char
* sImplementationName
,
1012 void* _pServiceManager
, void* /*_pRegistryKey*/ )
1016 ::com::sun::star::lang::XMultiServiceFactory
* pServiceManager
=
1017 reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory
* >
1018 ( _pServiceManager
);
1019 ::com::sun::star::uno::Reference
<
1020 ::com::sun::star::lang::XSingleServiceFactory
> xFactory
;
1022 if ( 0 == rtl_str_compare( sImplementationName
, cSearchImpl
) )
1024 ::com::sun::star::uno::Sequence
< ::rtl::OUString
> aServiceNames(1);
1025 aServiceNames
[0] = getServiceName_Static();
1026 xFactory
= ::cppu::createSingleFactory(
1027 pServiceManager
, getImplementationName_Static(),
1028 &TextSearch_CreateInstance
, aServiceNames
);
1031 if ( xFactory
.is() )
1033 xFactory
->acquire();
1034 pRet
= xFactory
.get();