Popular sites on the NTP: re-download popular suggestions once per Chrome run
[chromium-blink-merge.git] / chrome / browser / spellchecker / spellcheck_custom_dictionary.cc
bloba0175c03ddade6bd5ff69725178281c2e5505575
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
7 #include <functional>
9 #include "base/files/file_util.h"
10 #include "base/files/important_file_writer.h"
11 #include "base/md5.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "base/strings/string_util.h"
15 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
16 #include "chrome/common/chrome_constants.h"
17 #include "chrome/common/spellcheck_common.h"
18 #include "content/public/browser/browser_thread.h"
19 #include "sync/api/sync_change.h"
20 #include "sync/api/sync_error_factory.h"
21 #include "sync/protocol/sync.pb.h"
23 using content::BrowserThread;
25 namespace {
27 // Filename extension for backup dictionary file.
28 const base::FilePath::CharType BACKUP_EXTENSION[] = FILE_PATH_LITERAL("backup");
30 // Prefix for the checksum in the dictionary file.
31 const char CHECKSUM_PREFIX[] = "checksum_v1 = ";
33 // The status of the checksum in a custom spellcheck dictionary.
34 enum ChecksumStatus {
35 VALID_CHECKSUM,
36 INVALID_CHECKSUM,
39 // The result of a dictionary sanitation. Can be used as a bitmap.
40 enum ChangeSanitationResult {
41 // The change is valid and can be applied as-is.
42 VALID_CHANGE = 0,
44 // The change contained words to be added that are not valid.
45 DETECTED_INVALID_WORDS = 1,
47 // The change contained words to be added that are already in the dictionary.
48 DETECTED_DUPLICATE_WORDS = 2,
50 // The change contained words to be removed that are not in the dictionary.
51 DETECTED_MISSING_WORDS = 4,
54 // Loads the file at |file_path| into the |words| container. If the file has a
55 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
56 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
57 ChecksumStatus LoadFile(const base::FilePath& file_path,
58 std::set<std::string>* words) {
59 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
60 DCHECK(words);
61 words->clear();
62 std::string contents;
63 base::ReadFileToString(file_path, &contents);
64 size_t pos = contents.rfind(CHECKSUM_PREFIX);
65 if (pos != std::string::npos) {
66 std::string checksum = contents.substr(pos + strlen(CHECKSUM_PREFIX));
67 contents = contents.substr(0, pos);
68 if (checksum != base::MD5String(contents))
69 return INVALID_CHECKSUM;
72 std::vector<std::string> word_list = base::SplitString(
73 base::TrimWhitespaceASCII(contents, base::TRIM_ALL), "\n",
74 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
75 words->insert(word_list.begin(), word_list.end());
76 return VALID_CHECKSUM;
79 // Returns true for valid custom dictionary words.
80 bool IsValidWord(const std::string& word) {
81 std::string tmp;
82 return !word.empty() &&
83 word.size() <=
84 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES &&
85 base::IsStringUTF8(word) &&
86 base::TRIM_NONE ==
87 base::TrimWhitespaceASCII(word, base::TRIM_ALL, &tmp);
90 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
91 // the dictionary checksum is not valid, but backup checksum is valid, then
92 // restores the backup and loads that into |custom_words| instead. If the backup
93 // is invalid too, then clears |custom_words|. Must be called on the file
94 // thread.
95 void LoadDictionaryFileReliably(const base::FilePath& path,
96 std::set<std::string>* custom_words) {
97 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
98 DCHECK(custom_words);
99 // Load the contents and verify the checksum.
100 if (LoadFile(path, custom_words) == VALID_CHECKSUM)
101 return;
102 // Checksum is not valid. See if there's a backup.
103 base::FilePath backup = path.AddExtension(BACKUP_EXTENSION);
104 if (!base::PathExists(backup))
105 return;
106 // Load the backup and verify its checksum.
107 if (LoadFile(backup, custom_words) != VALID_CHECKSUM)
108 return;
109 // Backup checksum is valid. Restore the backup.
110 base::CopyFile(backup, path);
113 // Backs up the original dictionary, saves |custom_words| and its checksum into
114 // the custom spellcheck dictionary at |path|.
115 void SaveDictionaryFileReliably(const base::FilePath& path,
116 const std::set<std::string>& custom_words) {
117 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
118 std::stringstream content;
119 for (const std::string& word : custom_words)
120 content << word << '\n';
122 std::string checksum = base::MD5String(content.str());
123 content << CHECKSUM_PREFIX << checksum;
124 base::CopyFile(path, path.AddExtension(BACKUP_EXTENSION));
125 base::ImportantFileWriter::WriteFileAtomically(path, content.str());
128 // Removes duplicate and invalid words from |to_add| word list. Looks for
129 // duplicates in both |to_add| and |existing| word lists. Returns a bitmap of
130 // |ChangeSanitationResult| values.
131 int SanitizeWordsToAdd(const std::set<std::string>& existing,
132 std::set<std::string>* to_add) {
133 DCHECK(to_add);
134 // Do not add duplicate words.
135 std::set<std::string> new_words =
136 base::STLSetDifference<std::set<std::string>>(*to_add, existing);
137 int result = VALID_CHANGE;
138 if (to_add->size() != new_words.size())
139 result |= DETECTED_DUPLICATE_WORDS;
140 // Do not add invalid words.
141 std::set<std::string> valid_new_words;
142 for (const std::string& word : new_words) {
143 if (IsValidWord(word))
144 valid_new_words.insert(valid_new_words.end(), word);
146 if (valid_new_words.size() != new_words.size())
147 result |= DETECTED_INVALID_WORDS;
148 // Save the sanitized words to be added.
149 std::swap(*to_add, valid_new_words);
150 return result;
153 // Removes word from |to_remove| that are missing from |existing| word list and
154 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
155 int SanitizeWordsToRemove(const std::set<std::string>& existing,
156 std::set<std::string>* to_remove) {
157 DCHECK(to_remove);
158 // Do not remove words that are missing from the dictionary.
159 std::set<std::string> found_words =
160 base::STLSetIntersection<std::set<std::string>>(existing, *to_remove);
161 int result = VALID_CHANGE;
162 if (to_remove->size() > found_words.size())
163 result |= DETECTED_MISSING_WORDS;
164 // Save the sanitized words to be removed.
165 std::swap(*to_remove, found_words);
166 return result;
169 } // namespace
171 SpellcheckCustomDictionary::Change::Change() {
174 SpellcheckCustomDictionary::Change::~Change() {
177 void SpellcheckCustomDictionary::Change::AddWord(const std::string& word) {
178 to_add_.insert(word);
181 void SpellcheckCustomDictionary::Change::AddWords(
182 const std::set<std::string>& words) {
183 to_add_.insert(words.begin(), words.end());
186 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string& word) {
187 to_remove_.insert(word);
190 int SpellcheckCustomDictionary::Change::Sanitize(
191 const std::set<std::string>& words) {
192 int result = VALID_CHANGE;
193 if (!to_add_.empty())
194 result |= SanitizeWordsToAdd(words, &to_add_);
195 if (!to_remove_.empty())
196 result |= SanitizeWordsToRemove(words, &to_remove_);
197 return result;
200 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
201 const base::FilePath& dictionary_directory_name)
202 : custom_dictionary_path_(
203 dictionary_directory_name.Append(chrome::kCustomDictionaryFileName)),
204 is_loaded_(false),
205 weak_ptr_factory_(this) {
208 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
211 const std::set<std::string>& SpellcheckCustomDictionary::GetWords() const {
212 DCHECK_CURRENTLY_ON(BrowserThread::UI);
213 return words_;
216 bool SpellcheckCustomDictionary::AddWord(const std::string& word) {
217 DCHECK_CURRENTLY_ON(BrowserThread::UI);
218 scoped_ptr<Change> dictionary_change(new Change);
219 dictionary_change->AddWord(word);
220 int result = dictionary_change->Sanitize(GetWords());
221 Apply(*dictionary_change);
222 Notify(*dictionary_change);
223 Sync(*dictionary_change);
224 Save(dictionary_change.Pass());
225 return result == VALID_CHANGE;
228 bool SpellcheckCustomDictionary::RemoveWord(const std::string& word) {
229 DCHECK_CURRENTLY_ON(BrowserThread::UI);
230 scoped_ptr<Change> dictionary_change(new Change);
231 dictionary_change->RemoveWord(word);
232 int result = dictionary_change->Sanitize(GetWords());
233 Apply(*dictionary_change);
234 Notify(*dictionary_change);
235 Sync(*dictionary_change);
236 Save(dictionary_change.Pass());
237 return result == VALID_CHANGE;
240 bool SpellcheckCustomDictionary::HasWord(const std::string& word) const {
241 return !!words_.count(word);
244 void SpellcheckCustomDictionary::AddObserver(Observer* observer) {
245 DCHECK_CURRENTLY_ON(BrowserThread::UI);
246 DCHECK(observer);
247 observers_.AddObserver(observer);
250 void SpellcheckCustomDictionary::RemoveObserver(Observer* observer) {
251 DCHECK_CURRENTLY_ON(BrowserThread::UI);
252 DCHECK(observer);
253 observers_.RemoveObserver(observer);
256 bool SpellcheckCustomDictionary::IsLoaded() {
257 DCHECK_CURRENTLY_ON(BrowserThread::UI);
258 return is_loaded_;
261 bool SpellcheckCustomDictionary::IsSyncing() {
262 DCHECK_CURRENTLY_ON(BrowserThread::UI);
263 return !!sync_processor_.get();
266 void SpellcheckCustomDictionary::Load() {
267 DCHECK_CURRENTLY_ON(BrowserThread::UI);
268 BrowserThread::PostTaskAndReplyWithResult(
269 BrowserThread::FILE,
270 FROM_HERE,
271 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile,
272 custom_dictionary_path_),
273 base::Bind(&SpellcheckCustomDictionary::OnLoaded,
274 weak_ptr_factory_.GetWeakPtr()));
277 syncer::SyncMergeResult SpellcheckCustomDictionary::MergeDataAndStartSyncing(
278 syncer::ModelType type,
279 const syncer::SyncDataList& initial_sync_data,
280 scoped_ptr<syncer::SyncChangeProcessor> sync_processor,
281 scoped_ptr<syncer::SyncErrorFactory> sync_error_handler) {
282 DCHECK_CURRENTLY_ON(BrowserThread::UI);
283 DCHECK(!sync_processor_.get());
284 DCHECK(!sync_error_handler_.get());
285 DCHECK(sync_processor.get());
286 DCHECK(sync_error_handler.get());
287 DCHECK_EQ(syncer::DICTIONARY, type);
288 sync_processor_ = sync_processor.Pass();
289 sync_error_handler_ = sync_error_handler.Pass();
291 // Build a list of words to add locally.
292 scoped_ptr<Change> to_change_locally(new Change);
293 for (const syncer::SyncData& data : initial_sync_data) {
294 DCHECK_EQ(syncer::DICTIONARY, data.GetDataType());
295 to_change_locally->AddWord(data.GetSpecifics().dictionary().word());
298 // Add as many as possible local words remotely.
299 to_change_locally->Sanitize(GetWords());
300 Change to_change_remotely;
301 to_change_remotely.AddWords(base::STLSetDifference<std::set<std::string>>(
302 words_, to_change_locally->to_add()));
304 // Add remote words locally.
305 Apply(*to_change_locally);
306 Notify(*to_change_locally);
307 Save(to_change_locally.Pass());
309 // Send local changes to the sync server.
310 syncer::SyncMergeResult result(type);
311 result.set_error(Sync(to_change_remotely));
312 return result;
315 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type) {
316 DCHECK_CURRENTLY_ON(BrowserThread::UI);
317 DCHECK_EQ(syncer::DICTIONARY, type);
318 sync_processor_.reset();
319 sync_error_handler_.reset();
322 syncer::SyncDataList SpellcheckCustomDictionary::GetAllSyncData(
323 syncer::ModelType type) const {
324 DCHECK_CURRENTLY_ON(BrowserThread::UI);
325 DCHECK_EQ(syncer::DICTIONARY, type);
326 syncer::SyncDataList data;
327 std::string word;
328 size_t i = 0;
329 for (auto it = words_.begin();
330 it != words_.end() &&
331 i < chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS;
332 ++it, ++i) {
333 word = *it;
334 sync_pb::EntitySpecifics specifics;
335 specifics.mutable_dictionary()->set_word(word);
336 data.push_back(syncer::SyncData::CreateLocalData(word, word, specifics));
338 return data;
341 syncer::SyncError SpellcheckCustomDictionary::ProcessSyncChanges(
342 const tracked_objects::Location& from_here,
343 const syncer::SyncChangeList& change_list) {
344 DCHECK_CURRENTLY_ON(BrowserThread::UI);
345 scoped_ptr<Change> dictionary_change(new Change);
346 for (const syncer::SyncChange& change : change_list) {
347 DCHECK(change.IsValid());
348 const std::string& word =
349 change.sync_data().GetSpecifics().dictionary().word();
350 switch (change.change_type()) {
351 case syncer::SyncChange::ACTION_ADD:
352 dictionary_change->AddWord(word);
353 break;
354 case syncer::SyncChange::ACTION_DELETE:
355 dictionary_change->RemoveWord(word);
356 break;
357 case syncer::SyncChange::ACTION_UPDATE:
358 // Intentionally fall through.
359 case syncer::SyncChange::ACTION_INVALID:
360 return sync_error_handler_->CreateAndUploadError(
361 FROM_HERE,
362 "Processing sync changes failed on change type " +
363 syncer::SyncChange::ChangeTypeToString(change.change_type()));
367 dictionary_change->Sanitize(GetWords());
368 Apply(*dictionary_change);
369 Notify(*dictionary_change);
370 Save(dictionary_change.Pass());
372 return syncer::SyncError();
375 // static
376 scoped_ptr<std::set<std::string>>
377 SpellcheckCustomDictionary::LoadDictionaryFile(const base::FilePath& path) {
378 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
379 scoped_ptr<std::set<std::string>> words(new std::set<std::string>);
380 LoadDictionaryFileReliably(path, words.get());
381 if (!words->empty() &&
382 VALID_CHANGE !=
383 SanitizeWordsToAdd(std::set<std::string>(), words.get())) {
384 SaveDictionaryFileReliably(path, *words);
386 SpellCheckHostMetrics::RecordCustomWordCountStats(words->size());
387 return words;
390 // static
391 void SpellcheckCustomDictionary::UpdateDictionaryFile(
392 scoped_ptr<Change> dictionary_change,
393 const base::FilePath& path) {
394 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
395 DCHECK(dictionary_change);
397 if (dictionary_change->empty())
398 return;
400 std::set<std::string> custom_words;
401 LoadDictionaryFileReliably(path, &custom_words);
403 // Add words.
404 custom_words.insert(dictionary_change->to_add().begin(),
405 dictionary_change->to_add().end());
407 // Remove words and save the remainder.
408 SaveDictionaryFileReliably(path,
409 base::STLSetDifference<std::set<std::string>>(
410 custom_words, dictionary_change->to_remove()));
413 void SpellcheckCustomDictionary::OnLoaded(
414 scoped_ptr<std::set<std::string>> custom_words) {
415 DCHECK_CURRENTLY_ON(BrowserThread::UI);
416 DCHECK(custom_words);
417 Change dictionary_change;
418 dictionary_change.AddWords(*custom_words);
419 dictionary_change.Sanitize(GetWords());
420 Apply(dictionary_change);
421 Sync(dictionary_change);
422 is_loaded_ = true;
423 FOR_EACH_OBSERVER(Observer, observers_, OnCustomDictionaryLoaded());
426 void SpellcheckCustomDictionary::Apply(const Change& dictionary_change) {
427 DCHECK_CURRENTLY_ON(BrowserThread::UI);
428 if (!dictionary_change.to_add().empty()) {
429 words_.insert(dictionary_change.to_add().begin(),
430 dictionary_change.to_add().end());
432 if (!dictionary_change.to_remove().empty()) {
433 std::set<std::string> updated_words =
434 base::STLSetDifference<std::set<std::string>>(
435 words_, dictionary_change.to_remove());
436 std::swap(words_, updated_words);
440 void SpellcheckCustomDictionary::Save(scoped_ptr<Change> dictionary_change) {
441 DCHECK_CURRENTLY_ON(BrowserThread::UI);
442 BrowserThread::PostTask(
443 BrowserThread::FILE, FROM_HERE,
444 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile,
445 base::Passed(&dictionary_change), custom_dictionary_path_));
448 syncer::SyncError SpellcheckCustomDictionary::Sync(
449 const Change& dictionary_change) {
450 DCHECK_CURRENTLY_ON(BrowserThread::UI);
451 syncer::SyncError error;
452 if (!IsSyncing() || dictionary_change.empty())
453 return error;
455 // The number of words on the sync server should not exceed the limits.
456 int server_size = static_cast<int>(words_.size()) -
457 static_cast<int>(dictionary_change.to_add().size());
458 int max_upload_size = std::max(
460 static_cast<int>(
461 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS) -
462 server_size);
463 int upload_size = std::min(
464 static_cast<int>(dictionary_change.to_add().size()),
465 max_upload_size);
467 syncer::SyncChangeList sync_change_list;
468 int i = 0;
470 for (auto it = dictionary_change.to_add().begin();
471 it != dictionary_change.to_add().end() && i < upload_size; ++it, ++i) {
472 const std::string& word = *it;
473 sync_pb::EntitySpecifics specifics;
474 specifics.mutable_dictionary()->set_word(word);
475 sync_change_list.push_back(syncer::SyncChange(
476 FROM_HERE, syncer::SyncChange::ACTION_ADD,
477 syncer::SyncData::CreateLocalData(word, word, specifics)));
480 for (const std::string& word : dictionary_change.to_remove()) {
481 sync_pb::EntitySpecifics specifics;
482 specifics.mutable_dictionary()->set_word(word);
483 sync_change_list.push_back(syncer::SyncChange(
484 FROM_HERE,
485 syncer::SyncChange::ACTION_DELETE,
486 syncer::SyncData::CreateLocalData(word, word, specifics)));
489 // Send the changes to the sync processor.
490 error = sync_processor_->ProcessSyncChanges(FROM_HERE, sync_change_list);
491 if (error.IsSet())
492 return error;
494 // Turn off syncing of this dictionary if the server already has the maximum
495 // number of words.
496 if (words_.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS)
497 StopSyncing(syncer::DICTIONARY);
499 return error;
502 void SpellcheckCustomDictionary::Notify(const Change& dictionary_change) {
503 DCHECK_CURRENTLY_ON(BrowserThread::UI);
504 if (!IsLoaded() || dictionary_change.empty())
505 return;
506 FOR_EACH_OBSERVER(Observer,
507 observers_,
508 OnCustomDictionaryChanged(dictionary_change));