Branch libreoffice-24-8-3
[LibreOffice.git] / external / hunspell / 0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch
blob2e903a34e42ae76fe1c775da4b7cd5ca1b14b3d7
1 From b88f9ea57bdb9b219f3c1d2c67f4f882f1f23194 Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20N=C3=A9meth?= <nemeth@numbertext.org>
3 Date: Sun, 14 May 2023 22:15:15 +0200
4 Subject: [PATCH] Keep only REP, ph: or 2-word dictionary phrase suggestions
6 These are the best suggestions, no need to search other
7 ones to avoid annoying redundant and long list.
9 For example to suggest only "a lot" to the bad form "alot",
10 add the 2-word phrase "a lot" to the dic file.
12 Or for a very typical spelling mistake, enough to specify the
13 bad form with a ph: in the dictionary file to remove the other
14 suggestions.
16 Note: partial revert of commit de9fe28008eb0761c33bd83847f282602c599fda
17 "fix up some warnings seen with -Wall -Wextra".
18 ---
19 src/hunspell/atypes.hxx | 1 +
20 src/hunspell/suggestmgr.cxx | 31 ++++++++++++++++++++++++++-----
21 src/hunspell/suggestmgr.hxx | 2 +-
22 tests/ph.sug | 4 ++--
23 tests/rep.sug | 2 +-
24 5 files changed, 31 insertions(+), 9 deletions(-)
26 diff --git a/src/hunspell/atypes.hxx b/src/hunspell/atypes.hxx
27 index 7e5a5c0..6e3ed1b 100644
28 --- a/src/hunspell/atypes.hxx
29 +++ b/src/hunspell/atypes.hxx
30 @@ -82,6 +82,7 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
31 #define SPELL_ORIGCAP (1 << 5)
32 #define SPELL_WARN (1 << 6)
33 #define SPELL_COMPOUND_2 (1 << 7) // permit only 2 dictionary words in the compound
34 +#define SPELL_BEST_SUG (1 << 8) // limit suggestions for the best ones, i.e. ph:
36 #define MINCPDLEN 3
37 #define MAXCOMPOUND 10
38 diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
39 index 19a24f8..ba688aa 100644
40 --- a/src/hunspell/suggestmgr.cxx
41 +++ b/src/hunspell/suggestmgr.cxx
42 @@ -242,8 +242,11 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst,
43 if ((slst.size() < maxSug) && (!cpdsuggest || (slst.size() < oldSug + maxcpdsugs))) {
44 size_t i = slst.size();
45 replchars(slst, word, cpdsuggest, info);
46 - if (slst.size() > i)
47 + if (slst.size() > i) {
48 good_suggestion = true;
49 + if (info & SPELL_BEST_SUG)
50 + return true;
51 + }
53 if (clock() > timelimit + TIMELIMIT_SUGGESTION)
54 return good_suggestion;
55 @@ -365,7 +368,10 @@ bool SuggestMgr::suggest(std::vector<std::string>& slst,
56 // we always suggest them, in despite of nosplitsugs, and
57 // drop compound word and other suggestions)
58 if (!cpdsuggest || (!nosplitsugs && slst.size() < oldSug + maxcpdsugs)) {
59 - good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion, info);
60 + good_suggestion = twowords(slst, word, cpdsuggest, good_suggestion, info);
62 + if (info & SPELL_BEST_SUG)
63 + return true;
65 if (clock() > timelimit + TIMELIMIT_SUGGESTION)
66 return good_suggestion;
67 @@ -506,15 +512,23 @@ int SuggestMgr::replchars(std::vector<std::string>& wlst,
68 candidate.assign(word, 0, r);
69 candidate.append(entry.outstrings[type]);
70 candidate.append(word, r + entry.pattern.size(), std::string::npos);
71 + size_t sp = candidate.find(' ');
72 + size_t oldns = wlst.size();
73 testsug(wlst, candidate, cpdsuggest, NULL, NULL, info);
74 + if (oldns < wlst.size()) {
75 + int patlen = entry.pattern.size();
76 + int replen = entry.outstrings[type].size();
77 + // REP suggestions are the best, don't search other type of suggestions
78 + info |= SPELL_BEST_SUG;
79 + }
81 // check REP suggestions with space
82 - size_t sp = candidate.find(' ');
83 if (sp != std::string::npos) {
84 size_t prev = 0;
85 while (sp != std::string::npos) {
86 std::string prev_chunk = candidate.substr(prev, sp - prev);
87 if (checkword(prev_chunk, 0, NULL, NULL)) {
88 - size_t oldns = wlst.size();
89 + oldns = wlst.size();
90 std::string post_chunk = candidate.substr(sp + 1);
91 testsug(wlst, post_chunk, cpdsuggest, NULL, NULL, info);
92 if (oldns < wlst.size()) {
93 @@ -854,11 +868,15 @@ bool SuggestMgr::twowords(std::vector<std::string>& wlst,
94 // alot -> a lot, alto, slot...
95 *p = ' ';
96 if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) {
97 + // best solution
98 + info |= SPELL_BEST_SUG;
100 // remove not word pair suggestions
101 if (!good) {
102 good = true;
103 wlst.clear();
106 wlst.insert(wlst.begin(), candidate);
109 @@ -867,6 +885,9 @@ bool SuggestMgr::twowords(std::vector<std::string>& wlst,
110 *p = '-';
112 if (!cpdsuggest && checkword(candidate, cpdsuggest, NULL, NULL)) {
113 + // best solution
114 + info |= SPELL_BEST_SUG;
116 // remove not word pair suggestions
117 if (!good) {
118 good = true;
119 diff --git a/tests/ph.sug b/tests/ph.sug
120 index 8daee56..ccd936e 100644
121 --- a/tests/ph.sug
122 +++ b/tests/ph.sug
123 @@ -1,11 +1,11 @@
124 a lot
125 -in spite, inspire
126 +in spite
127 what
128 what
129 Wednesday
130 Wednesday
131 Wednesday
132 Wednesday
133 -which, witch, winch, wish
134 +which, witch
135 Oh, my gosh!
136 OH, MY GOSH!
137 diff --git a/tests/rep.sug b/tests/rep.sug
138 index b48a5b8..424731c 100644
139 --- a/tests/rep.sug
140 +++ b/tests/rep.sug
141 @@ -5,4 +5,4 @@ a lot, lot
142 un alunno
144 vinte e un
145 -auto's, auto
146 +auto's
148 2.25.1