1 From cf0967951a25a2daa10a636092193af5c5497aa2 Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolanm@redhat.com>
3 Date: Fri, 10 Feb 2017 16:36:27 +0000
4 Subject: [PATCH 3/4] hoist string lowering from ngram to ngsuggest
6 only lower when we have to and reuse scratch buffers as
9 kcachegrind reports 830,529,143 -> 779,887,690 on
11 echo Hollo | valgrind --tool=callgrind ./src/tools/.libs/hunspell -d nl_NL
13 src/hunspell/suggestmgr.cxx | 143 +++++++++++++++++++++++++++++---------------
14 1 file changed, 95 insertions(+), 48 deletions(-)
16 diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
17 index 54a474f..ea52707 100644
18 --- a/src/hunspell/suggestmgr.cxx
19 +++ b/src/hunspell/suggestmgr.cxx
20 @@ -1075,10 +1075,8 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
21 u8_u16(w_target, target);
24 - std::vector<w_char> w_entry;
26 std::vector<w_char> w_f;
27 - std::vector<w_char> w_target2;
29 for (size_t i = 0; i < rHMgr.size(); ++i) {
30 while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) {
31 @@ -1091,13 +1089,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
36 - u8_u16(w_entry, HENTRY_WORD(hp));
37 - sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) +
38 - leftcommonsubstring(w_word, w_entry);
40 + u8_u16(w_f, HENTRY_WORD(hp));
42 + int leftcommon = leftcommonsubstring(w_word, w_f);
44 + // lowering dictionary word
45 + mkallsmall_utf(w_f, langnum);
47 + sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
49 - sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) +
50 - leftcommonsubstring(word, HENTRY_WORD(hp));
51 + f.assign(HENTRY_WORD(hp));
53 + int leftcommon = leftcommonsubstring(word, f.c_str());
55 + // lowering dictionary word
56 + mkallsmall(f, csconv);
58 + sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
61 // check special pronounciation
62 @@ -1108,11 +1117,20 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
66 - sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) +
67 - leftcommonsubstring(w_word, w_f);
69 + int leftcommon = leftcommonsubstring(w_word, w_f);
71 + // lowering dictionary word
72 + mkallsmall_utf(w_f, langnum);
74 + sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon;
76 - sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) +
77 - leftcommonsubstring(word, f.c_str());
78 + int leftcommon = leftcommonsubstring(word, f.c_str());
80 + // lowering dictionary word
81 + mkallsmall(f, csconv);
83 + sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon;
87 @@ -1129,14 +1147,14 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
88 candidate = HENTRY_WORD(hp);
89 mkallcap(candidate, csconv);
91 - std::string target2 = phonet(candidate, *ph);
93 + f = phonet(candidate, *ph);
96 - u8_u16(w_target2, target2);
97 - scphon = 2 * ngram(3, w_target, w_target2,
99 + scphon = 2 * ngram(3, w_target, w_f,
102 - scphon = 2 * ngram(3, target, target2,
103 + scphon = 2 * ngram(3, target, f,
107 @@ -1177,12 +1195,24 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
111 - thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low);
114 + // lowering dictionary word
115 + mkallsmall_utf(w_mw, langnum);
118 + thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH);
120 std::string mw = word;
121 for (int k = sp; k < n; k += 4)
123 - thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low);
126 + // lowering dictionary word
127 + mkallsmall(mw, csconv);
130 + thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH);
134 @@ -1210,7 +1240,6 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
138 - std::vector<w_char> w_glst_word;
139 for (int i = 0; i < MAX_ROOTS; i++) {
141 struct hentry* rp = roots[i];
142 @@ -1225,15 +1254,26 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
144 for (int k = 0; k < nw; k++) {
146 - w_glst_word.clear();
147 - u8_u16(w_glst_word, glst[k].word);
148 - sc = ngram(n, w_word, w_glst_word,
149 - NGRAM_ANY_MISMATCH + low) +
150 - leftcommonsubstring(w_word, w_glst_word);
152 + u8_u16(w_f, glst[k].word);
154 + int leftcommon = leftcommonsubstring(w_word, w_f);
156 + // lowering dictionary word
157 + mkallsmall_utf(w_f, langnum);
160 + sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon;
162 - sc = ngram(n, word, glst[k].word,
163 - NGRAM_ANY_MISMATCH + low) +
164 - leftcommonsubstring(word, glst[k].word);
167 + int leftcommon = leftcommonsubstring(word, f.c_str());
169 + // lowering dictionary word
170 + mkallsmall(f, csconv);
173 + sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon;
177 @@ -1318,19 +1358,37 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
181 - re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
182 - ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
183 + //w_gl is lowercase already at this point
184 + re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
187 + // lowering dictionary word
188 + mkallsmall_utf(w_f, langnum);
189 + re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
191 + re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
194 - re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) +
195 - ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED);
196 + //gl is lowercase already at this point
197 + re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
200 + // lowering dictionary word
201 + mkallsmall(f, csconv);
202 + re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
204 + re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED);
208 int ngram_score, leftcommon_score;
210 - ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low);
211 + //w_gl is lowercase already at this point
212 + ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH);
213 leftcommon_score = leftcommonsubstring(w_word, w_gl);
215 - ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low);
216 + //gl is lowercase already at this point
217 + ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH);
218 leftcommon_score = leftcommonsubstring(word, gl.c_str());
221 @@ -1802,14 +1860,6 @@ int SuggestMgr::ngram(int n,
225 - // lowering dictionary word
226 - const std::vector<w_char>* p_su2 = &su2;
227 - std::vector<w_char> su2_copy;
228 - if (opt & NGRAM_LOWERING) {
230 - mkallsmall_utf(su2_copy, langnum);
233 for (int j = 1; j <= n; j++) {
235 for (int i = 0; i <= (l1 - j); i++) {
236 @@ -1817,7 +1867,7 @@ int SuggestMgr::ngram(int n,
237 for (int l = 0; l <= (l2 - j); l++) {
238 for (k = 0; k < j; k++) {
239 const w_char& c1 = su1[i + k];
240 - const w_char& c2 = (*p_su2)[l + k];
241 + const w_char& c2 = su2[l + k];
242 if ((c1.l != c2.l) || (c1.h != c2.h))
245 @@ -1862,14 +1912,11 @@ int SuggestMgr::ngram(int n,
250 - if (opt & NGRAM_LOWERING)
251 - mkallsmall(t, csconv);
252 for (int j = 1; j <= n; j++) {
254 for (int i = 0; i <= (l1 - j); i++) {
255 - //t is haystack, s1[i..i+j) is needle
256 - if (t.find(s1.c_str()+i, 0, j) != std::string::npos) {
257 + //s2 is haystack, s1[i..i+j) is needle
258 + if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) {
260 } else if (opt & NGRAM_WEIGHTED) {