1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
21 #define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
23 #include <unotools/unotoolsdllapi.h>
24 #include <i18nlangtag/lang.h>
25 #include <rtl/ustring.hxx>
26 #include <com/sun/star/uno/Reference.h>
30 #define WLD_THRESHOLD 3
31 #define SMALL_STRING_THRESHOLD 4
35 namespace com::sun::star::lang
{ struct Locale
; }
36 namespace com::sun::star::util
{ class XTextSearch2
; }
37 namespace com::sun::star::util
{ struct SearchResult
; }
39 struct SearchOptions2
;
45 // Utility class for searching
46 class UNOTOOLS_DLLPUBLIC SearchParam
49 enum class SearchType
{ Normal
, Regexp
, Wildcard
, Unknown
= -1 };
51 /** Convert configuration and document boolean settings to SearchType.
52 If bWildcard is true it takes precedence over rbRegExp.
54 If true and bWildcard is also true, rbRegExp is set to false to
55 adapt the caller's settings.
57 static SearchType
ConvertToSearchType( bool bWildcard
, bool & rbRegExp
)
63 return SearchType::Wildcard
;
65 return rbRegExp
? SearchType::Regexp
: SearchType::Normal
;
68 /** Convert SearchType to configuration and document boolean settings.
70 static void ConvertToBool( const SearchType eSearchType
, bool& rbWildcard
, bool& rbRegExp
)
74 case SearchType::Wildcard
:
78 case SearchType::Regexp
:
90 OUString sSrchStr
; // the search string
92 SearchType m_eSrchType
; // search normal/regular/LevDist
94 sal_uInt32 m_cWildEscChar
; // wildcard escape character
96 bool m_bCaseSense
: 1;
97 bool m_bWildMatchSel
: 1; // wildcard pattern must match entire selection
100 SearchParam( const OUString
&rText
,
101 SearchType eSrchType
,
102 bool bCaseSensitive
= true,
103 sal_uInt32 cWildEscChar
= '\\',
104 bool bWildMatchSel
= false );
106 SearchParam( const SearchParam
& );
110 const OUString
& GetSrchStr() const { return sSrchStr
; }
111 SearchType
GetSrchType() const { return m_eSrchType
; }
113 bool IsCaseSensitive() const { return m_bCaseSense
; }
114 bool IsWildMatchSel() const { return m_bWildMatchSel
; }
116 // signed return for API use
117 sal_Int32
GetWildEscChar() const { return static_cast<sal_Int32
>(m_cWildEscChar
); }
120 // For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
121 template<typename charT
, typename traits
>
122 inline std::basic_ostream
<charT
, traits
> & operator <<(std::basic_ostream
<charT
, traits
> & stream
, const SearchParam::SearchType
& eType
)
126 case SearchParam::SearchType::Normal
:
129 case SearchParam::SearchType::Regexp
:
132 case SearchParam::SearchType::Wildcard
:
135 case SearchParam::SearchType::Unknown
:
139 stream
<< static_cast<int>(eType
) << '?';
146 // Utility class for searching a substring in a string.
147 // The following metrics are supported
148 // - ordinary text (Bayer/Moore)
149 // - regular expressions
150 // - weighted Levenshtein distance
151 // - wildcards '*' and '?'
153 // This class allows forward and backward searching!
155 class UNOTOOLS_DLLPUBLIC TextSearch
157 static css::uno::Reference
< css::util::XTextSearch2
>
158 getXTextSearch( const i18nutil::SearchOptions2
& rPara
);
160 css::uno::Reference
< css::util::XTextSearch2
>
163 void Init( const SearchParam
& rParam
,
164 const css::lang::Locale
& rLocale
);
167 // rText is the string being searched for
168 // this first two CTORs are deprecated!
169 TextSearch( const SearchParam
& rPara
, LanguageType nLanguage
);
170 TextSearch( const SearchParam
& rPara
, const CharClass
& rCClass
);
172 TextSearch( const i18nutil::SearchOptions2
& rPara
);
175 /* search in the (selected) text the search string:
176 rScrTxt - the text, in which we search
177 pStart - start position for the search
178 pEnd - end position for the search
180 RETURN values == true: something is found
181 - pStart start pos of the found text,
182 - pEnd end pos of the found text,
183 - pSrchResult - the search result with all found
184 positions. Is only filled with more positions
185 if the regular expression handles groups.
187 == false: nothing found, pStart, pEnd unchanged.
189 Definitions: start pos always inclusive, end pos always exclusive!
190 The position must always in the right direction!
191 search forward: start <= end
192 search backward: end <= start
194 bool SearchForward( const OUString
&rStr
,
195 sal_Int32
* pStart
, sal_Int32
* pEnd
,
196 css::util::SearchResult
* pRes
= nullptr );
198 * @brief searchForward Search forward beginning from the start to the end
200 * @param rStr The text in which we search
201 * @return True if the search term is found in the text
203 bool searchForward( const OUString
&rStr
);
204 bool SearchBackward( const OUString
&rStr
,
205 sal_Int32
* pStart
, sal_Int32
* pEnd
,
206 css::util::SearchResult
* pRes
= nullptr );
208 void SetLocale( const i18nutil::SearchOptions2
& rOpt
,
209 const css::lang::Locale
& rLocale
);
211 /* replace back references in the replace string by the sub expressions from the search result */
212 static void ReplaceBackReferences( OUString
& rReplaceStr
, std::u16string_view rStr
, const css::util::SearchResult
& rResult
);
215 * @brief Search for a string in a another one based on similarity
216 * @param rString The string we compare with
217 * @param rSearchString The search term
218 * @param rSimilarityScore The similarity score (sent by reference to be filled)
219 * @return True if the search term is found, false otherwise
221 static bool SimilaritySearch(const OUString
& rString
, const OUString
& rSearchString
,
222 ::std::pair
<sal_Int32
, sal_Int32
>& rSimilarityScore
);
224 * @brief Get similarity score between two strings
225 * according to the length of the common substring and its position
226 * @param rString The string we compare with
227 * @param rSearchString The search term
228 * @param nInitialScore The initial score
229 * @param bFromStart True if the search is from the start
230 * @return Score if the search term is found in the text, -1 otherwise
232 static sal_Int32
GetSubstringSimilarity(std::u16string_view rString
,
233 std::u16string_view rSearchString
,
234 sal_Int32
& nInitialScore
, const bool bFromStart
);
235 static sal_Int32
GetWeightedLevenshteinDistance(const OUString
& rString
,
236 const OUString
& rSearchString
);
243 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */