Don't preload rarely seen large images
[chromium-blink-merge.git] / third_party / cld / encodings / compact_lang_det / tote.cc
blob75e5051964b7470fb6b585106bb006e087b04cfb
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "encodings/compact_lang_det/tote.h"
6 #include <string.h> // memset
8 #include "encodings/compact_lang_det/win/cld_logging.h"
11 // Take a set of <key, value> pairs and tote them up.
12 // After explicitly sorting, retrieve top key, value pairs
13 Tote::Tote() {
14 gram_count_ = 0;
15 incr_count_ = 0;
16 byte_count_ = 0;
17 memset(key_, 0, sizeof(key_));
18 // No need to initialize values
21 Tote::~Tote() {
24 void Tote::Reinit() {
25 gram_count_ = 0;
26 incr_count_ = 0;
27 byte_count_ = 0;
28 memset(key_, 0, sizeof(key_));
29 // No need to initialize values
32 // Increment count of quadgrams/trigrams/unigrams scored
33 void Tote::AddGram() {
34 ++gram_count_;
37 // Three-way associative, guaranteeing that the largest two counts are always
38 // in the data structure. kMaxSize must be a multiple of 3, and is tied to the
39 // subscript calculations here, which are for 8 sets of 3-way associative
40 // buckets. The subscripts for set N are [N], [N+8], and [N+16] used in a
41 // slightly-weird way: The initial probe point is [N] or [N+8], whichever
42 // is specified by key mod 16. In most cases (nearly *all* cases except Latin
43 // script), this entry matches and we update/return. The second probe is
44 // the other of [N] and [N+8]. The third probe is only used as a fallback to
45 // these two, and is there only for the rare case that there are three or more
46 // languages with Language enum values equal mod 8, contending within the same
47 // bucket. This can only happen in Latin and (rarely) Cyrillic scripts, because
48 // the other scripts have fewer than 17 languages total.
49 // If you change kMaxSize, change the constants 7/8/15/16 below
50 void Tote::Add(uint8 ikey, int idelta) {
51 DCHECK(ikey != 0);
52 ++incr_count_;
54 // Look for existing entry
55 int sub0 = ikey & 15;
56 if (key_[sub0] == ikey) {
57 value_[sub0] += idelta;
58 return;
60 int sub1 = sub0 ^ 8;
61 if (key_[sub1] == ikey) {
62 value_[sub1] += idelta;
63 return;
65 int sub2 = (ikey & 7) + 16;
66 if (key_[sub2] == ikey) {
67 value_[sub2] += idelta;
68 return;
71 // Allocate new entry
72 int alloc = -1;
73 if (key_[sub0] == 0) {
74 alloc = sub0;
75 } else if (key_[sub1] == 0) {
76 alloc = sub1;
77 } else if (key_[sub2] == 0) {
78 alloc = sub2;
79 } else {
80 // All choices allocated, need to replace smallest one
81 alloc = sub0;
82 if (value_[sub1] < value_[alloc]) {alloc = sub1;}
83 if (value_[sub2] < value_[alloc]) {alloc = sub2;}
85 key_[alloc] = ikey;
86 value_[alloc] = idelta;
87 return;
90 // Return current top key
91 int Tote::CurrentTopKey() {
92 int top_key = 0;
93 int top_value = -1;
94 for (int sub = 0; sub < kMaxSize_; ++sub) {
95 if (key_[sub] == 0) {continue;}
96 if (top_value < value_[sub]) {
97 top_value = value_[sub];
98 top_key = key_[sub];
101 return top_key;
105 // Sort first n entries by decreasing order of value
106 // If key==0 other fields are not valid, treat value as -1
107 void Tote::Sort(int n) {
108 // This is n**2, but n is small
109 for (int sub = 0; sub < n; ++sub) {
110 if (key_[sub] == 0) {value_[sub] = -1;}
112 // Bubble sort key[sub] and entry[sub]
113 for (int sub2 = sub + 1; sub2 < kMaxSize_; ++sub2) {
114 if (key_[sub2] == 0) {value_[sub2] = -1;}
115 if (value_[sub] < value_[sub2]) {
116 // swap
117 uint8 tmpk = key_[sub];
118 key_[sub] = key_[sub2];
119 key_[sub2] = tmpk;
120 int tmpv = value_[sub];
121 value_[sub] = value_[sub2];
122 value_[sub2] = tmpv;
128 void Tote::Dump(FILE* f) {
129 for (int sub = 0; sub < kMaxSize_; ++sub) {
130 if (key_[sub] > 0) {
131 fprintf(f, "[%2d] %3d %8d\n", sub, key_[sub], value_[sub]);
134 fprintf(f, "%d %d %d\n", gram_count_, incr_count_, byte_count_);
140 // Take a set of <key, value> pairs and tote them up.
141 // After explicitly sorting, retrieve top key, value pairs
142 ToteWithReliability::ToteWithReliability() {
143 // No need to initialize score_ or value_
144 incr_count_ = 0;
145 sorted_ = 0;
146 memset(closepair_, 0, sizeof(closepair_));
147 memset(key_, 0, sizeof(key_));
150 ToteWithReliability::~ToteWithReliability() {
153 void ToteWithReliability::Reinit() {
154 // No need to initialize score_ or value_
155 incr_count_ = 0;
156 sorted_ = 0;
157 memset(closepair_, 0, sizeof(closepair_));
158 memset(key_, 0, sizeof(key_));
159 ////ss_.Init();
162 // Weight reliability by ibytes
163 // Also see three-way associative comments above for Tote
164 void ToteWithReliability::Add(uint8 ikey, int ibytes,
165 int score, int ireliability) {
166 DCHECK(ikey != 0);
167 CHECK(sorted_ == 0);
168 ++incr_count_;
170 // Look for existing entry
171 int sub0 = ikey & 15;
172 if (key_[sub0] == ikey) {
173 value_[sub0] += ibytes;
174 score_[sub0] += score;
175 reliability_[sub0] += ireliability * ibytes;
176 return;
178 int sub1 = sub0 ^ 8;
179 if (key_[sub1] == ikey) {
180 value_[sub1] += ibytes;
181 score_[sub1] += score;
182 reliability_[sub1] += ireliability * ibytes;
183 return;
185 int sub2 = (ikey & 7) + 16;
186 if (key_[sub2] == ikey) {
187 value_[sub2] += ibytes;
188 score_[sub2] += score;
189 reliability_[sub2] += ireliability * ibytes;
190 return;
193 // Allocate new entry
194 int alloc = -1;
195 if (key_[sub0] == 0) {
196 alloc = sub0;
197 } else if (key_[sub1] == 0) {
198 alloc = sub1;
199 } else if (key_[sub2] == 0) {
200 alloc = sub2;
201 } else {
202 // All choices allocated, need to replace smallest one
203 alloc = sub0;
204 if (value_[sub1] < value_[alloc]) {alloc = sub1;}
205 if (value_[sub2] < value_[alloc]) {alloc = sub2;}
207 key_[alloc] = ikey;
208 value_[alloc] = ibytes;
209 score_[alloc] = score;
210 reliability_[alloc] = ireliability * ibytes;
211 return;
214 // Find subscript of a given packed language, or -1
215 int ToteWithReliability::Find(uint8 ikey) {
216 DCHECK(ikey != 0);
218 if (sorted_) {
219 // Linear search if sorted
220 for (int sub = 0; sub < kMaxSize_; ++sub) {
221 if (key_[sub] == ikey) {return sub;}
223 return -1;
226 // Look for existing entry
227 int sub0 = ikey & 15;
228 if (key_[sub0] == ikey) {
229 return sub0;
231 int sub1 = sub0 ^ 8;
232 if (key_[sub1] == ikey) {
233 return sub1;
235 int sub2 = (ikey & 7) + 16;
236 if (key_[sub2] == ikey) {
237 return sub2;
240 return -1;
243 // Return current top key
244 int ToteWithReliability::CurrentTopKey() {
245 int top_key = 0;
246 int top_value = -1;
247 for (int sub = 0; sub < kMaxSize_; ++sub) {
248 if (key_[sub] == 0) {continue;}
249 if (top_value < value_[sub]) {
250 top_value = value_[sub];
251 top_key = key_[sub];
254 return top_key;
258 // Sort first n entries by decreasing order of value
259 // If key==0 other fields are not valid, treat value as -1
260 void ToteWithReliability::Sort(int n) {
261 // This is n**2, but n is small
262 for (int sub = 0; sub < n; ++sub) {
263 if (key_[sub] == 0) {value_[sub] = -1;}
265 // Bubble sort key[sub] and entry[sub]
266 for (int sub2 = sub + 1; sub2 < kMaxSize_; ++sub2) {
267 if (key_[sub2] == 0) {value_[sub2] = -1;}
268 if (value_[sub] < value_[sub2]) {
269 // swap
270 uint8 tmpk = key_[sub];
271 key_[sub] = key_[sub2];
272 key_[sub2] = tmpk;
274 int tmpv = value_[sub];
275 value_[sub] = value_[sub2];
276 value_[sub2] = tmpv;
278 int tmps = score_[sub];
279 score_[sub] = score_[sub2];
280 score_[sub2] = tmps;
282 int tmpr = reliability_[sub];
283 reliability_[sub] = reliability_[sub2];
284 reliability_[sub2] = tmpr;
288 sorted_ = 1;
291 void ToteWithReliability::Dump(FILE* f) {
292 for (int sub = 0; sub < kMaxSize_; ++sub) {
293 if (key_[sub] > 0) {
294 fprintf(f, "[%2d] %3d %6d %5d %4d\n",
295 sub, key_[sub], value_[sub], score_[sub], reliability_[sub]);
298 fprintf(f, " %d#\n", incr_count_);