1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
21 #define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
23 #include <unotools/unotoolsdllapi.h>
24 #include <i18nlangtag/lang.h>
25 #include <rtl/ustring.hxx>
26 #include <com/sun/star/uno/Reference.h>
30 #define WLD_THRESHOLD 3
31 #define SMALL_STRING_THRESHOLD 4
35 namespace com::sun::star::lang
{ struct Locale
; }
36 namespace com::sun::star::util
{ class XTextSearch2
; }
37 namespace com::sun::star::util
{ struct SearchResult
; }
40 struct SearchOptions2
;
42 enum class TransliterationFlags
;
47 // Utility class for searching
48 class UNOTOOLS_DLLPUBLIC SearchParam
51 enum class SearchType
{ Normal
, Regexp
, Wildcard
, Unknown
= -1 };
53 /** Convert configuration and document boolean settings to SearchType.
54 If bWildcard is true it takes precedence over rbRegExp.
56 If true and bWildcard is also true, rbRegExp is set to false to
57 adapt the caller's settings.
59 static SearchType
ConvertToSearchType( bool bWildcard
, bool & rbRegExp
)
65 return SearchType::Wildcard
;
67 return rbRegExp
? SearchType::Regexp
: SearchType::Normal
;
70 /** Convert SearchType to configuration and document boolean settings.
72 static void ConvertToBool( const SearchType eSearchType
, bool& rbWildcard
, bool& rbRegExp
)
76 case SearchType::Wildcard
:
80 case SearchType::Regexp
:
92 OUString sSrchStr
; // the search string
94 SearchType m_eSrchType
; // search normal/regular/LevDist
96 sal_uInt32 m_cWildEscChar
; // wildcard escape character
98 bool m_bCaseSense
: 1;
99 bool m_bWildMatchSel
: 1; // wildcard pattern must match entire selection
102 SearchParam( const OUString
&rText
,
103 SearchType eSrchType
,
104 bool bCaseSensitive
= true,
105 sal_uInt32 cWildEscChar
= '\\',
106 bool bWildMatchSel
= false );
108 SearchParam( const SearchParam
& );
112 const OUString
& GetSrchStr() const { return sSrchStr
; }
113 SearchType
GetSrchType() const { return m_eSrchType
; }
115 bool IsCaseSensitive() const { return m_bCaseSense
; }
116 bool IsWildMatchSel() const { return m_bWildMatchSel
; }
118 // signed return for API use
119 sal_Int32
GetWildEscChar() const { return static_cast<sal_Int32
>(m_cWildEscChar
); }
122 // For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
123 template<typename charT
, typename traits
>
124 inline std::basic_ostream
<charT
, traits
> & operator <<(std::basic_ostream
<charT
, traits
> & stream
, const SearchParam::SearchType
& eType
)
128 case SearchParam::SearchType::Normal
:
131 case SearchParam::SearchType::Regexp
:
134 case SearchParam::SearchType::Wildcard
:
137 case SearchParam::SearchType::Unknown
:
141 stream
<< static_cast<int>(eType
) << '?';
148 // Utility class for searching a substring in a string.
149 // The following metrics are supported
150 // - ordinary text (Bayer/Moore)
151 // - regular expressions
152 // - weighted Levenshtein distance
153 // - wildcards '*' and '?'
155 // This class allows forward and backward searching!
157 class UNOTOOLS_DLLPUBLIC TextSearch
159 static css::uno::Reference
< css::util::XTextSearch2
>
160 getXTextSearch( const i18nutil::SearchOptions2
& rPara
);
162 css::uno::Reference
< css::util::XTextSearch2
>
165 void Init( const SearchParam
& rParam
,
166 const css::lang::Locale
& rLocale
);
169 // rText is the string being searched for
170 // this first two CTORs are deprecated!
171 TextSearch( const SearchParam
& rPara
, LanguageType nLanguage
);
172 TextSearch( const SearchParam
& rPara
, const CharClass
& rCClass
);
174 TextSearch( const i18nutil::SearchOptions2
& rPara
);
177 /* search in the (selected) text the search string:
178 rScrTxt - the text, in which we search
179 pStart - start position for the search
180 pEnd - end position for the search
182 RETURN values == true: something is found
183 - pStart start pos of the found text,
184 - pEnd end pos of the found text,
185 - pSrchResult - the search result with all found
186 positions. Is only filled with more positions
187 if the regular expression handles groups.
189 == false: nothing found, pStart, pEnd unchanged.
191 Definitions: start pos always inclusive, end pos always exclusive!
192 The position must always in the right direction!
193 search forward: start <= end
194 search backward: end <= start
196 bool SearchForward( const OUString
&rStr
,
197 sal_Int32
* pStart
, sal_Int32
* pEnd
,
198 css::util::SearchResult
* pRes
= nullptr );
200 * @brief searchForward Search forward beginning from the start to the end
202 * @param rStr The text in which we search
203 * @return True if the search term is found in the text
205 bool searchForward( const OUString
&rStr
);
206 bool SearchBackward( const OUString
&rStr
,
207 sal_Int32
* pStart
, sal_Int32
* pEnd
,
208 css::util::SearchResult
* pRes
= nullptr );
210 void SetLocale( const i18nutil::SearchOptions2
& rOpt
,
211 const css::lang::Locale
& rLocale
);
213 /* replace back references in the replace string by the sub expressions from the search result */
214 static void ReplaceBackReferences( OUString
& rReplaceStr
, std::u16string_view rStr
, const css::util::SearchResult
& rResult
);
217 * @brief Search for a string in a another one based on similarity
218 * @param rString The string we compare with
219 * @param rSearchString The search term
220 * @param rSimilarityScore The similarity score (sent by reference to be filled)
221 * @return True if the search term is found, false otherwise
223 static bool SimilaritySearch(const OUString
& rString
, const OUString
& rSearchString
,
224 ::std::pair
<sal_Int32
, sal_Int32
>& rSimilarityScore
);
226 * @brief Get similarity score between two strings
227 * according to the length of the common substring and its position
228 * @param rString The string we compare with
229 * @param rSearchString The search term
230 * @param nInitialScore The initial score
231 * @param bFromStart True if the search is from the start
232 * @return Score if the search term is found in the text, -1 otherwise
234 static sal_Int32
GetSubstringSimilarity(std::u16string_view rString
,
235 std::u16string_view rSearchString
,
236 sal_Int32
& nInitialScore
, const bool bFromStart
);
237 static sal_Int32
GetWeightedLevenshteinDistance(const OUString
& rString
,
238 const OUString
& rSearchString
);
245 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */