Use IID_PPV_ARGS and CComPtr to simplify
[LibreOffice.git] / include / unotools / textsearch.hxx
blob019ff6f1ac8278d1b5d632c55062c158a888b1f6
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
21 #define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
23 #include <unotools/unotoolsdllapi.h>
24 #include <i18nlangtag/lang.h>
25 #include <rtl/ustring.hxx>
26 #include <com/sun/star/uno/Reference.h>
28 #include <ostream>
30 #define WLD_THRESHOLD 3
31 #define SMALL_STRING_THRESHOLD 4
33 class CharClass;
35 namespace com::sun::star::lang { struct Locale; }
36 namespace com::sun::star::util { class XTextSearch2; }
37 namespace com::sun::star::util { struct SearchResult; }
38 namespace i18nutil {
39 struct SearchOptions2;
42 namespace utl
45 // Utility class for searching
46 class UNOTOOLS_DLLPUBLIC SearchParam
48 public:
49 enum class SearchType { Normal, Regexp, Wildcard, Unknown = -1 };
51 /** Convert configuration and document boolean settings to SearchType.
52 If bWildcard is true it takes precedence over rbRegExp.
53 @param rbRegExp
54 If true and bWildcard is also true, rbRegExp is set to false to
55 adapt the caller's settings.
57 static SearchType ConvertToSearchType( bool bWildcard, bool & rbRegExp )
59 if (bWildcard)
61 if (rbRegExp)
62 rbRegExp = false;
63 return SearchType::Wildcard;
65 return rbRegExp ? SearchType::Regexp : SearchType::Normal;
68 /** Convert SearchType to configuration and document boolean settings.
70 static void ConvertToBool( const SearchType eSearchType, bool& rbWildcard, bool& rbRegExp )
72 switch (eSearchType)
74 case SearchType::Wildcard:
75 rbWildcard = true;
76 rbRegExp = false;
77 break;
78 case SearchType::Regexp:
79 rbWildcard = false;
80 rbRegExp = true;
81 break;
82 default:
83 rbWildcard = false;
84 rbRegExp = false;
85 break;
89 private:
90 OUString sSrchStr; // the search string
92 SearchType m_eSrchType; // search normal/regular/LevDist
94 sal_uInt32 m_cWildEscChar; // wildcard escape character
96 bool m_bCaseSense : 1;
97 bool m_bWildMatchSel : 1; // wildcard pattern must match entire selection
99 public:
100 SearchParam( const OUString &rText,
101 SearchType eSrchType,
102 bool bCaseSensitive = true,
103 sal_uInt32 cWildEscChar = '\\',
104 bool bWildMatchSel = false );
106 SearchParam( const SearchParam& );
108 ~SearchParam();
110 const OUString& GetSrchStr() const { return sSrchStr; }
111 SearchType GetSrchType() const { return m_eSrchType; }
113 bool IsCaseSensitive() const { return m_bCaseSense; }
114 bool IsWildMatchSel() const { return m_bWildMatchSel; }
116 // signed return for API use
117 sal_Int32 GetWildEscChar() const { return static_cast<sal_Int32>(m_cWildEscChar); }
120 // For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
121 template<typename charT, typename traits>
122 inline std::basic_ostream<charT, traits> & operator <<(std::basic_ostream<charT, traits> & stream, const SearchParam::SearchType& eType)
124 switch (eType)
126 case SearchParam::SearchType::Normal:
127 stream << "N";
128 break;
129 case SearchParam::SearchType::Regexp:
130 stream << "RE";
131 break;
132 case SearchParam::SearchType::Wildcard:
133 stream << "WC";
134 break;
135 case SearchParam::SearchType::Unknown:
136 stream << "UNK";
137 break;
138 default:
139 stream << static_cast<int>(eType) << '?';
140 break;
143 return stream;
146 // Utility class for searching a substring in a string.
147 // The following metrics are supported
148 // - ordinary text (Bayer/Moore)
149 // - regular expressions
150 // - weighted Levenshtein distance
151 // - wildcards '*' and '?'
153 // This class allows forward and backward searching!
155 class UNOTOOLS_DLLPUBLIC TextSearch
157 static css::uno::Reference< css::util::XTextSearch2 >
158 getXTextSearch( const i18nutil::SearchOptions2& rPara );
160 css::uno::Reference < css::util::XTextSearch2 >
161 xTextSearch;
163 void Init( const SearchParam & rParam,
164 const css::lang::Locale& rLocale );
166 public:
167 // rText is the string being searched for
168 // this first two CTORs are deprecated!
169 TextSearch( const SearchParam & rPara, LanguageType nLanguage );
170 TextSearch( const SearchParam & rPara, const CharClass& rCClass );
172 TextSearch( const i18nutil::SearchOptions2& rPara );
173 ~TextSearch();
175 /* search in the (selected) text the search string:
176 rScrTxt - the text, in which we search
177 pStart - start position for the search
178 pEnd - end position for the search
180 RETURN values == true: something is found
181 - pStart start pos of the found text,
182 - pEnd end pos of the found text,
183 - pSrchResult - the search result with all found
184 positions. Is only filled with more positions
185 if the regular expression handles groups.
187 == false: nothing found, pStart, pEnd unchanged.
189 Definitions: start pos always inclusive, end pos always exclusive!
190 The position must always in the right direction!
191 search forward: start <= end
192 search backward: end <= start
194 bool SearchForward( const OUString &rStr,
195 sal_Int32* pStart, sal_Int32* pEnd,
196 css::util::SearchResult* pRes = nullptr );
198 * @brief searchForward Search forward beginning from the start to the end
199 * of the given text
200 * @param rStr The text in which we search
201 * @return True if the search term is found in the text
203 bool searchForward( const OUString &rStr );
204 bool SearchBackward( const OUString &rStr,
205 sal_Int32* pStart, sal_Int32* pEnd,
206 css::util::SearchResult* pRes = nullptr );
208 void SetLocale( const i18nutil::SearchOptions2& rOpt,
209 const css::lang::Locale& rLocale );
211 /* replace back references in the replace string by the sub expressions from the search result */
212 static void ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const css::util::SearchResult& rResult );
215 * @brief Search for a string in a another one based on similarity
216 * @param rString The string we compare with
217 * @param rSearchString The search term
218 * @param rSimilarityScore The similarity score (sent by reference to be filled)
219 * @return True if the search term is found, false otherwise
221 static bool SimilaritySearch(const OUString& rString, const OUString& rSearchString,
222 ::std::pair<sal_Int32, sal_Int32>& rSimilarityScore);
224 * @brief Get similarity score between two strings
225 * according to the length of the common substring and its position
226 * @param rString The string we compare with
227 * @param rSearchString The search term
228 * @param nInitialScore The initial score
229 * @param bFromStart True if the search is from the start
230 * @return Score if the search term is found in the text, -1 otherwise
232 static sal_Int32 GetSubstringSimilarity(std::u16string_view rString,
233 std::u16string_view rSearchString,
234 sal_Int32& nInitialScore, const bool bFromStart);
235 static sal_Int32 GetWeightedLevenshteinDistance(const OUString& rString,
236 const OUString& rSearchString);
239 } // namespace utl
241 #endif
243 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */