tdf#130857 qt weld: Implement QtInstanceWidget::strip_mnemonic
[LibreOffice.git] / i18npool / source / search / levdis.hxx
blobb378c03cff4eb1720a15c9271e992b66859d3d7f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #pragma once
22 #include <sal/types.h>
23 #include <memory>
25 // Sensible default values for a user interface could be:
26 // LEVDISDEFAULT_XOTHER 2
27 // Maximum X replacements to match query, found data may be different by X
28 // characters from query.
29 // LEVDISDEFAULT_YSHORTER 1
30 // Maximum Y insertions to match query, found data may be Y characters
31 // shorter than query.
32 // LEVDISDEFAULT_ZLONGER 3
33 // Maximum Z deletions to match query, found data may be Z characters
34 // longer than query.
35 // LEVDISDEFAULT_RELAXED TRUE
36 // Use relaxed SplitCount instead of mathematical WLD.
38 // Joker/wildcards ('?' and '*') of course do not count as
39 // replacement/insertion/deletion. At a '?' a replacement is not counted, for a
40 // '*' the found data may be any number of characters longer than the query.
42 // Strict mathematical WLD: EITHER maximum X replacements OR Y characters
43 // shorter OR Z characters longer.
44 // Relaxed SplitCount: maximum X replacements AND/OR Y character shorter AND/OR
45 // Z characters longer. Any combination of actions is valid.
47 // The value range for X,Y,Z is 0..33 to keep the limit within a 16 bit signed
48 // integer, 31*32*33 is the maximum limit, LCM(31,32,33) == 32736.
50 // The corresponding internal default weigh values for these user interface
51 // values would be:
52 // LEVDISDEFAULTLIMIT 6
53 // Default nLimit, matches x=2, y=1, z=3, p=3, q=6, r=2
54 // LEVDISDEFAULT_P0 3
55 // Default nRepP0, weight of replacements.
56 // LEVDISDEFAULT_Q0 6
57 // Default nInsQ0, weight of insertions.
58 // LEVDISDEFAULT_R0 2
59 // Default nDelR0, weight of deletions.
61 // The transformation of user input values to weighs is done using CalcLPQR().
62 // One caveat, if the WLD reaches nLimit due to nDelR0 (i.e. data string is nZ
63 // characters longer than pattern) then no character can be replaced any more.
64 // This can be circumvented by increasing nX or/and nZ, but of course with the
65 // side effect of being less strict then... or the other solution is to use
66 // relaxed SplitCount (see below), which is the default when using CalcLPQR().
68 // Attention: shorter = WLD.Insert, longer = WLD.Delete
70 // View and counting is always from data string to pattern, a deletion means
71 // that something is deleted from data to match pattern.
73 // Deletions weigh less in this example because usually less is known than is
74 // searched for. Replacements get middle weight, for example because of
75 // misspellings. Insertions are expensive.
77 // Another example: P0 = 1, Q0 = 4, R0 = 4, Limit = 3
78 // Allowed are maximum 4 replacements, no insertion, no deletion.
79 // Matches the user interface values X = 3, Y = 0, Z = 0
81 // bSplitCount: if TRUE, Rep/Ins/Del are counted differently. The return value
82 // of WLD() then isn't necessarily the Levenshtein-Distance, but can be
83 // negative (-WLD) if the WLD is greater than nLimit but single values are
84 // within the limits.
85 // For the above default values that could mean: even if the found string is
86 // already 2 characters longer (nLongerZ), 3 replacements (nOtherX) can be made
87 // to reach pattern. Additionally, character swaps count as one replacement.
88 // Mathematically completely incorrect, but meets user expectations ;-)
90 // Explanation: in the real WLD all actions are withdrawn from a common 100%
91 // pool, if one gets all there's nothing left for others. With bSplitCount
92 // replacements have their own pool.
95 /** "Safe" memory allocation in ctor */
96 class WLevDisPatternMem
98 std::unique_ptr<sal_Unicode[]> cp;
99 std::unique_ptr<bool[]> bp;
100 public:
101 explicit WLevDisPatternMem( sal_Int32 s )
102 : cp(new sal_Unicode[s])
103 , bp(new bool[s])
107 sal_Unicode* GetcPtr() const { return cp.get(); }
108 bool* GetbPtr() const { return bp.get(); }
111 class WLevDisDistanceMem
113 std::unique_ptr<int[]> p;
114 public:
115 explicit WLevDisDistanceMem( size_t s )
117 NewMem(s);
119 int* GetPtr() const { return p.get(); }
120 int* NewMem( size_t s )
122 p.reset(new int[ s<3 ? 3 : s ]);
123 return p.get();
127 /** Weighted Levenshtein Distance (WLD)
129 For a more detailed explanation see documentation in
130 i18npool/source/search/levdis.hxx
132 class WLevDistance
134 sal_Int32 nPatternLen; ///< length of pattern
135 WLevDisPatternMem aPatMem; ///< manage allocation of pattern array
136 sal_Unicode* cpPattern; ///< pointer to pattern array
137 bool* bpPatIsWild; ///< pointer to bool array whether pattern is wildcard
138 sal_Int32 nArrayLen; ///< length of distance array
139 WLevDisDistanceMem aDisMem; ///< manage allocation of distance array
140 int* npDistance; ///< pointer to distance array
141 int nLimit; ///< WLD limit replacements/insertions/deletions
142 int nRepP0; ///< replacement weigh
143 int nInsQ0; ///< insertion weigh
144 int nDelR0; ///< deletion weigh
145 int nStars; ///< count of '*' wildcards in pattern
146 bool bSplitCount; ///< if TRUE, Rep/Ins/Del are counted separately
148 void InitData( const sal_Unicode* cPattern );
149 static int Mid3( int x, int y, int z ); ///< middle value of 3 values
151 public:
153 /** CTor with user input. Internally calls CalcLPQR().
155 After this, obtain the resulting limit using GetLimit().
157 @param bRelaxed the mathematically incorrect method is default (TRUE)
159 WLevDistance( const sal_Unicode* cPattern, int nOtherX, int nShorterY,
160 int nLongerZ, bool bRelaxed );
162 WLevDistance( const WLevDistance& rWLD );
163 ~WLevDistance();
165 /** Calculate the Weighted Levenshtein Distance from string to pattern. */
166 int WLD( const sal_Unicode* cString, sal_Int32 nStringLen );
168 /** Calculate the internal weighs corresponding to the user input values.
169 @returns nLimit for later comparison with WLD()
171 void CalcLPQR( int nOtherX, int nShorterY, int nLongerZ,
172 bool bRelaxed );
174 int GetLimit() const { return nLimit; }
176 // Calculate current balance, keep this inline for performance reasons!
177 // c == cpPattern[jj] == cString[ii]
178 // First seek up to found place, if the balance is still equal there then
179 // also compare after the found place.
180 int levdisbalance(sal_Int32 jj, sal_Int32 ii, sal_Unicode c, const sal_Unicode* cString, sal_Int32 nStringLen) const
182 int nBalance = 0;
184 if ( jj != ii )
186 sal_Int32 k;
187 if ( jj > 0 )
188 for ( k=0; k < jj; k++ )
189 if ( cpPattern[k] == c )
190 nBalance++;
191 if ( ii > 0 )
192 for ( k=0; k < ii; k++ )
193 if ( cString[k] == c )
194 nBalance--;
195 if ( !nBalance )
197 for ( k=jj+1; k < nPatternLen; k++ )
198 if ( cpPattern[k] == c )
199 nBalance++;
200 for ( k=ii+1; k < nStringLen; k++ )
201 if ( cString[k] == c )
202 nBalance--;
206 return nBalance;
211 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */