tdf#163967 RTF: Pasting/loading hyperlink imports font color and underline
[LibreOffice.git] / include / unotools / textsearch.hxx
blob3b06c93549d574767a178944e6febf826df6254f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
21 #define INCLUDED_UNOTOOLS_TEXTSEARCH_HXX
23 #include <unotools/unotoolsdllapi.h>
24 #include <i18nlangtag/lang.h>
25 #include <rtl/ustring.hxx>
26 #include <com/sun/star/uno/Reference.h>
28 #include <ostream>
30 #define WLD_THRESHOLD 3
31 #define SMALL_STRING_THRESHOLD 4
33 class CharClass;
35 namespace com::sun::star::lang { struct Locale; }
36 namespace com::sun::star::util { class XTextSearch2; }
37 namespace com::sun::star::util { struct SearchResult; }
38 namespace i18nutil {
39 struct SearchOptions;
40 struct SearchOptions2;
42 enum class TransliterationFlags;
44 namespace utl
47 // Utility class for searching
48 class UNOTOOLS_DLLPUBLIC SearchParam
50 public:
51 enum class SearchType { Normal, Regexp, Wildcard, Unknown = -1 };
53 /** Convert configuration and document boolean settings to SearchType.
54 If bWildcard is true it takes precedence over rbRegExp.
55 @param rbRegExp
56 If true and bWildcard is also true, rbRegExp is set to false to
57 adapt the caller's settings.
59 static SearchType ConvertToSearchType( bool bWildcard, bool & rbRegExp )
61 if (bWildcard)
63 if (rbRegExp)
64 rbRegExp = false;
65 return SearchType::Wildcard;
67 return rbRegExp ? SearchType::Regexp : SearchType::Normal;
70 /** Convert SearchType to configuration and document boolean settings.
72 static void ConvertToBool( const SearchType eSearchType, bool& rbWildcard, bool& rbRegExp )
74 switch (eSearchType)
76 case SearchType::Wildcard:
77 rbWildcard = true;
78 rbRegExp = false;
79 break;
80 case SearchType::Regexp:
81 rbWildcard = false;
82 rbRegExp = true;
83 break;
84 default:
85 rbWildcard = false;
86 rbRegExp = false;
87 break;
91 private:
92 OUString sSrchStr; // the search string
94 SearchType m_eSrchType; // search normal/regular/LevDist
96 sal_uInt32 m_cWildEscChar; // wildcard escape character
98 bool m_bCaseSense : 1;
99 bool m_bWildMatchSel : 1; // wildcard pattern must match entire selection
101 public:
102 SearchParam( const OUString &rText,
103 SearchType eSrchType,
104 bool bCaseSensitive = true,
105 sal_uInt32 cWildEscChar = '\\',
106 bool bWildMatchSel = false );
108 SearchParam( const SearchParam& );
110 ~SearchParam();
112 const OUString& GetSrchStr() const { return sSrchStr; }
113 SearchType GetSrchType() const { return m_eSrchType; }
115 bool IsCaseSensitive() const { return m_bCaseSense; }
116 bool IsWildMatchSel() const { return m_bWildMatchSel; }
118 // signed return for API use
119 sal_Int32 GetWildEscChar() const { return static_cast<sal_Int32>(m_cWildEscChar); }
122 // For use in SAL_DEBUG etc. Output format not guaranteed to be stable.
123 template<typename charT, typename traits>
124 inline std::basic_ostream<charT, traits> & operator <<(std::basic_ostream<charT, traits> & stream, const SearchParam::SearchType& eType)
126 switch (eType)
128 case SearchParam::SearchType::Normal:
129 stream << "N";
130 break;
131 case SearchParam::SearchType::Regexp:
132 stream << "RE";
133 break;
134 case SearchParam::SearchType::Wildcard:
135 stream << "WC";
136 break;
137 case SearchParam::SearchType::Unknown:
138 stream << "UNK";
139 break;
140 default:
141 stream << static_cast<int>(eType) << '?';
142 break;
145 return stream;
148 // Utility class for searching a substring in a string.
149 // The following metrics are supported
150 // - ordinary text (Bayer/Moore)
151 // - regular expressions
152 // - weighted Levenshtein distance
153 // - wildcards '*' and '?'
155 // This class allows forward and backward searching!
157 class UNOTOOLS_DLLPUBLIC TextSearch
159 static css::uno::Reference< css::util::XTextSearch2 >
160 getXTextSearch( const i18nutil::SearchOptions2& rPara );
162 css::uno::Reference < css::util::XTextSearch2 >
163 xTextSearch;
165 void Init( const SearchParam & rParam,
166 const css::lang::Locale& rLocale );
168 public:
169 // rText is the string being searched for
170 // this first two CTORs are deprecated!
171 TextSearch( const SearchParam & rPara, LanguageType nLanguage );
172 TextSearch( const SearchParam & rPara, const CharClass& rCClass );
174 TextSearch( const i18nutil::SearchOptions2& rPara );
175 ~TextSearch();
177 /* search in the (selected) text the search string:
178 rScrTxt - the text, in which we search
179 pStart - start position for the search
180 pEnd - end position for the search
182 RETURN values == true: something is found
183 - pStart start pos of the found text,
184 - pEnd end pos of the found text,
185 - pSrchResult - the search result with all found
186 positions. Is only filled with more positions
187 if the regular expression handles groups.
189 == false: nothing found, pStart, pEnd unchanged.
191 Definitions: start pos always inclusive, end pos always exclusive!
192 The position must always in the right direction!
193 search forward: start <= end
194 search backward: end <= start
196 bool SearchForward( const OUString &rStr,
197 sal_Int32* pStart, sal_Int32* pEnd,
198 css::util::SearchResult* pRes = nullptr );
200 * @brief searchForward Search forward beginning from the start to the end
201 * of the given text
202 * @param rStr The text in which we search
203 * @return True if the search term is found in the text
205 bool searchForward( const OUString &rStr );
206 bool SearchBackward( const OUString &rStr,
207 sal_Int32* pStart, sal_Int32* pEnd,
208 css::util::SearchResult* pRes = nullptr );
210 void SetLocale( const i18nutil::SearchOptions2& rOpt,
211 const css::lang::Locale& rLocale );
213 /* replace back references in the replace string by the sub expressions from the search result */
214 static void ReplaceBackReferences( OUString& rReplaceStr, std::u16string_view rStr, const css::util::SearchResult& rResult );
217 * @brief Search for a string in a another one based on similarity
218 * @param rString The string we compare with
219 * @param rSearchString The search term
220 * @param rSimilarityScore The similarity score (sent by reference to be filled)
221 * @return True if the search term is found, false otherwise
223 static bool SimilaritySearch(const OUString& rString, const OUString& rSearchString,
224 ::std::pair<sal_Int32, sal_Int32>& rSimilarityScore);
226 * @brief Get similarity score between two strings
227 * according to the length of the common substring and its position
228 * @param rString The string we compare with
229 * @param rSearchString The search term
230 * @param nInitialScore The initial score
231 * @param bFromStart True if the search is from the start
232 * @return Score if the search term is found in the text, -1 otherwise
234 static sal_Int32 GetSubstringSimilarity(std::u16string_view rString,
235 std::u16string_view rSearchString,
236 sal_Int32& nInitialScore, const bool bFromStart);
237 static sal_Int32 GetWeightedLevenshteinDistance(const OUString& rString,
238 const OUString& rSearchString);
241 } // namespace utl
243 #endif
245 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */