1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
9 #include "base/file_util.h"
10 #include "base/files/important_file_writer.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
15 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/spellcheck_messages.h"
17 #include "content/public/browser/browser_thread.h"
18 #include "sync/api/sync_change.h"
19 #include "sync/api/sync_data.h"
20 #include "sync/api/sync_error_factory.h"
21 #include "sync/protocol/sync.pb.h"
23 using content::BrowserThread
;
24 using chrome::spellcheck_common::WordList
;
25 using chrome::spellcheck_common::WordSet
;
29 // Filename extension for backup dictionary file.
30 const base::FilePath::CharType BACKUP_EXTENSION
[] = FILE_PATH_LITERAL("backup");
32 // Prefix for the checksum in the dictionary file.
33 const char CHECKSUM_PREFIX
[] = "checksum_v1 = ";
35 // The status of the checksum in a custom spellcheck dictionary.
41 // The result of a dictionary sanitation. Can be used as a bitmap.
42 enum ChangeSanitationResult
{
43 // The change is valid and can be applied as-is.
46 // The change contained words to be added that are not valid.
47 DETECTED_INVALID_WORDS
= 1,
49 // The change contained words to be added that are already in the dictionary.
50 DETECTED_DUPLICATE_WORDS
= 2,
52 // The change contained words to be removed that are not in the dictionary.
53 DETECTED_MISSING_WORDS
= 4,
56 // Loads the file at |file_path| into the |words| container. If the file has a
57 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
58 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
59 ChecksumStatus
LoadFile(const base::FilePath
& file_path
, WordList
& words
) {
60 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
63 base::ReadFileToString(file_path
, &contents
);
64 size_t pos
= contents
.rfind(CHECKSUM_PREFIX
);
65 if (pos
!= std::string::npos
) {
66 std::string checksum
= contents
.substr(pos
+ strlen(CHECKSUM_PREFIX
));
67 contents
= contents
.substr(0, pos
);
68 if (checksum
!= base::MD5String(contents
))
69 return INVALID_CHECKSUM
;
71 TrimWhitespaceASCII(contents
, TRIM_ALL
, &contents
);
72 base::SplitString(contents
, '\n', &words
);
73 return VALID_CHECKSUM
;
76 // Returns true for invalid words and false for valid words.
77 bool IsInvalidWord(const std::string
& word
) {
79 return !IsStringUTF8(word
) ||
81 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES
||
83 TRIM_NONE
!= TrimWhitespaceASCII(word
, TRIM_ALL
, &tmp
);
86 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
87 // the dictionary checksum is not valid, but backup checksum is valid, then
88 // restores the backup and loads that into |custom_words| instead. If the backup
89 // is invalid too, then clears |custom_words|. Must be called on the file
91 void LoadDictionaryFileReliably(WordList
& custom_words
,
92 const base::FilePath
& path
) {
93 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
94 // Load the contents and verify the checksum.
95 if (LoadFile(path
, custom_words
) == VALID_CHECKSUM
)
97 // Checksum is not valid. See if there's a backup.
98 base::FilePath backup
= path
.AddExtension(BACKUP_EXTENSION
);
99 if (!base::PathExists(backup
))
101 // Load the backup and verify its checksum.
102 if (LoadFile(backup
, custom_words
) != VALID_CHECKSUM
)
104 // Backup checksum is valid. Restore the backup.
105 base::CopyFile(backup
, path
);
108 // Backs up the original dictionary, saves |custom_words| and its checksum into
109 // the custom spellcheck dictionary at |path|.
110 void SaveDictionaryFileReliably(
111 const WordList
& custom_words
,
112 const base::FilePath
& path
) {
113 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
114 std::stringstream content
;
115 for (WordList::const_iterator it
= custom_words
.begin();
116 it
!= custom_words
.end();
118 content
<< *it
<< '\n';
120 std::string checksum
= base::MD5String(content
.str());
121 content
<< CHECKSUM_PREFIX
<< checksum
;
122 base::CopyFile(path
, path
.AddExtension(BACKUP_EXTENSION
));
123 base::ImportantFileWriter::WriteFileAtomically(path
, content
.str());
126 // Removes duplicate and invalid words from |to_add| word list and sorts it.
127 // Looks for duplicates in both |to_add| and |existing| word lists. Returns a
128 // bitmap of |ChangeSanitationResult| values.
129 int SanitizeWordsToAdd(const WordSet
& existing
, WordList
& to_add
) {
130 // Do not add duplicate words.
131 std::sort(to_add
.begin(), to_add
.end());
132 WordList new_words
= base::STLSetDifference
<WordList
>(to_add
, existing
);
133 new_words
.erase(std::unique(new_words
.begin(), new_words
.end()),
135 int result
= VALID_CHANGE
;
136 if (to_add
.size() != new_words
.size())
137 result
|= DETECTED_DUPLICATE_WORDS
;
138 // Do not add invalid words.
139 size_t size
= new_words
.size();
140 new_words
.erase(std::remove_if(new_words
.begin(),
144 if (size
!= new_words
.size())
145 result
|= DETECTED_INVALID_WORDS
;
146 // Save the sanitized words to be added.
147 std::swap(to_add
, new_words
);
151 // Removes word from |to_remove| that are missing from |existing| word list and
152 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
153 int SanitizeWordsToRemove(const WordSet
& existing
, WordList
& to_remove
) {
154 // Do not remove words that are missing from the dictionary.
155 std::sort(to_remove
.begin(), to_remove
.end());
156 WordList found_words
;
157 std::set_intersection(existing
.begin(),
161 std::back_inserter(found_words
));
162 int result
= VALID_CHANGE
;
163 if (to_remove
.size() > found_words
.size())
164 result
|= DETECTED_MISSING_WORDS
;
165 // Save the sanitized words to be removed.
166 std::swap(to_remove
, found_words
);
173 SpellcheckCustomDictionary::Change::Change() {
176 SpellcheckCustomDictionary::Change::Change(
177 const SpellcheckCustomDictionary::Change
& other
)
178 : to_add_(other
.to_add()),
179 to_remove_(other
.to_remove()) {
182 SpellcheckCustomDictionary::Change::Change(const WordList
& to_add
)
186 SpellcheckCustomDictionary::Change::~Change() {
189 void SpellcheckCustomDictionary::Change::AddWord(const std::string
& word
) {
190 to_add_
.push_back(word
);
193 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string
& word
) {
194 to_remove_
.push_back(word
);
197 int SpellcheckCustomDictionary::Change::Sanitize(const WordSet
& words
) {
198 int result
= VALID_CHANGE
;
199 if (!to_add_
.empty())
200 result
|= SanitizeWordsToAdd(words
, to_add_
);
201 if (!to_remove_
.empty())
202 result
|= SanitizeWordsToRemove(words
, to_remove_
);
206 const WordList
& SpellcheckCustomDictionary::Change::to_add() const {
210 const WordList
& SpellcheckCustomDictionary::Change::to_remove() const {
214 bool SpellcheckCustomDictionary::Change::empty() const {
215 return to_add_
.empty() && to_remove_
.empty();
218 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
219 const base::FilePath
& path
)
220 : custom_dictionary_path_(),
222 weak_ptr_factory_(this) {
223 custom_dictionary_path_
=
224 path
.Append(chrome::kCustomDictionaryFileName
);
227 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
230 const WordSet
& SpellcheckCustomDictionary::GetWords() const {
231 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
235 bool SpellcheckCustomDictionary::AddWord(const std::string
& word
) {
236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
237 Change dictionary_change
;
238 dictionary_change
.AddWord(word
);
239 int result
= dictionary_change
.Sanitize(GetWords());
240 Apply(dictionary_change
);
241 Notify(dictionary_change
);
242 Sync(dictionary_change
);
243 Save(dictionary_change
);
244 return result
== VALID_CHANGE
;
247 bool SpellcheckCustomDictionary::RemoveWord(const std::string
& word
) {
248 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
249 Change dictionary_change
;
250 dictionary_change
.RemoveWord(word
);
251 int result
= dictionary_change
.Sanitize(GetWords());
252 Apply(dictionary_change
);
253 Notify(dictionary_change
);
254 Sync(dictionary_change
);
255 Save(dictionary_change
);
256 return result
== VALID_CHANGE
;
259 bool SpellcheckCustomDictionary::HasWord(const std::string
& word
) const {
260 return !!words_
.count(word
);
263 void SpellcheckCustomDictionary::AddObserver(Observer
* observer
) {
264 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
265 observers_
.AddObserver(observer
);
268 void SpellcheckCustomDictionary::RemoveObserver(Observer
* observer
) {
269 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
270 observers_
.RemoveObserver(observer
);
273 bool SpellcheckCustomDictionary::IsLoaded() {
274 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
278 bool SpellcheckCustomDictionary::IsSyncing() {
279 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
280 return !!sync_processor_
.get();
283 void SpellcheckCustomDictionary::Load() {
284 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
285 BrowserThread::PostTaskAndReplyWithResult(
288 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile
,
289 custom_dictionary_path_
),
290 base::Bind(&SpellcheckCustomDictionary::OnLoaded
,
291 weak_ptr_factory_
.GetWeakPtr()));
294 syncer::SyncMergeResult
SpellcheckCustomDictionary::MergeDataAndStartSyncing(
295 syncer::ModelType type
,
296 const syncer::SyncDataList
& initial_sync_data
,
297 scoped_ptr
<syncer::SyncChangeProcessor
> sync_processor
,
298 scoped_ptr
<syncer::SyncErrorFactory
> sync_error_handler
) {
299 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
300 DCHECK(!sync_processor_
.get());
301 DCHECK(!sync_error_handler_
.get());
302 DCHECK(sync_processor
.get());
303 DCHECK(sync_error_handler
.get());
304 DCHECK_EQ(syncer::DICTIONARY
, type
);
305 sync_processor_
= sync_processor
.Pass();
306 sync_error_handler_
= sync_error_handler
.Pass();
308 // Build a list of words to add locally.
309 WordList to_add_locally
;
310 for (syncer::SyncDataList::const_iterator it
= initial_sync_data
.begin();
311 it
!= initial_sync_data
.end();
313 DCHECK_EQ(syncer::DICTIONARY
, it
->GetDataType());
314 to_add_locally
.push_back(it
->GetSpecifics().dictionary().word());
317 // Add remote words locally.
318 Change
to_change_locally(to_add_locally
);
319 to_change_locally
.Sanitize(GetWords());
320 Apply(to_change_locally
);
321 Notify(to_change_locally
);
322 Save(to_change_locally
);
324 // Add as many as possible local words remotely.
325 std::sort(to_add_locally
.begin(), to_add_locally
.end());
326 WordList to_add_remotely
= base::STLSetDifference
<WordList
>(words_
,
329 // Send local changes to the sync server.
330 Change
to_change_remotely(to_add_remotely
);
331 syncer::SyncMergeResult
result(type
);
332 result
.set_error(Sync(to_change_remotely
));
336 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type
) {
337 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
338 DCHECK_EQ(syncer::DICTIONARY
, type
);
339 sync_processor_
.reset();
340 sync_error_handler_
.reset();
343 syncer::SyncDataList
SpellcheckCustomDictionary::GetAllSyncData(
344 syncer::ModelType type
) const {
345 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
346 DCHECK_EQ(syncer::DICTIONARY
, type
);
347 syncer::SyncDataList data
;
350 for (WordSet::const_iterator it
= words_
.begin();
351 it
!= words_
.end() &&
352 i
< chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
;
355 sync_pb::EntitySpecifics specifics
;
356 specifics
.mutable_dictionary()->set_word(word
);
357 data
.push_back(syncer::SyncData::CreateLocalData(word
, word
, specifics
));
362 syncer::SyncError
SpellcheckCustomDictionary::ProcessSyncChanges(
363 const tracked_objects::Location
& from_here
,
364 const syncer::SyncChangeList
& change_list
) {
365 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
366 Change dictionary_change
;
367 for (syncer::SyncChangeList::const_iterator it
= change_list
.begin();
368 it
!= change_list
.end();
370 DCHECK(it
->IsValid());
371 std::string word
= it
->sync_data().GetSpecifics().dictionary().word();
372 switch (it
->change_type()) {
373 case syncer::SyncChange::ACTION_ADD
:
374 dictionary_change
.AddWord(word
);
376 case syncer::SyncChange::ACTION_DELETE
:
377 dictionary_change
.RemoveWord(word
);
380 return sync_error_handler_
->CreateAndUploadError(
382 "Processing sync changes failed on change type " +
383 syncer::SyncChange::ChangeTypeToString(it
->change_type()));
387 dictionary_change
.Sanitize(GetWords());
388 Apply(dictionary_change
);
389 Notify(dictionary_change
);
390 Save(dictionary_change
);
392 return syncer::SyncError();
396 WordList
SpellcheckCustomDictionary::LoadDictionaryFile(
397 const base::FilePath
& path
) {
398 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
400 LoadDictionaryFileReliably(words
, path
);
401 if (!words
.empty() && VALID_CHANGE
!= SanitizeWordsToAdd(WordSet(), words
))
402 SaveDictionaryFileReliably(words
, path
);
403 SpellCheckHostMetrics::RecordCustomWordCountStats(words
.size());
408 void SpellcheckCustomDictionary::UpdateDictionaryFile(
409 const SpellcheckCustomDictionary::Change
& dictionary_change
,
410 const base::FilePath
& path
) {
411 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
412 if (dictionary_change
.empty())
415 WordList custom_words
;
416 LoadDictionaryFileReliably(custom_words
, path
);
419 custom_words
.insert(custom_words
.end(),
420 dictionary_change
.to_add().begin(),
421 dictionary_change
.to_add().end());
424 std::sort(custom_words
.begin(), custom_words
.end());
426 base::STLSetDifference
<WordList
>(custom_words
,
427 dictionary_change
.to_remove());
428 std::swap(custom_words
, remaining
);
430 SaveDictionaryFileReliably(custom_words
, path
);
433 void SpellcheckCustomDictionary::OnLoaded(WordList custom_words
) {
434 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
435 Change
dictionary_change(custom_words
);
436 dictionary_change
.Sanitize(GetWords());
437 Apply(dictionary_change
);
438 Sync(dictionary_change
);
440 FOR_EACH_OBSERVER(Observer
, observers_
, OnCustomDictionaryLoaded());
443 void SpellcheckCustomDictionary::Apply(
444 const SpellcheckCustomDictionary::Change
& dictionary_change
) {
445 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
446 if (!dictionary_change
.to_add().empty()) {
447 words_
.insert(dictionary_change
.to_add().begin(),
448 dictionary_change
.to_add().end());
450 if (!dictionary_change
.to_remove().empty()) {
451 WordSet updated_words
=
452 base::STLSetDifference
<WordSet
>(words_
,
453 dictionary_change
.to_remove());
454 std::swap(words_
, updated_words
);
458 void SpellcheckCustomDictionary::Save(
459 const SpellcheckCustomDictionary::Change
& dictionary_change
) {
460 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
461 BrowserThread::PostTask(
464 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile
,
466 custom_dictionary_path_
));
469 syncer::SyncError
SpellcheckCustomDictionary::Sync(
470 const SpellcheckCustomDictionary::Change
& dictionary_change
) {
471 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
472 syncer::SyncError error
;
473 if (!IsSyncing() || dictionary_change
.empty())
476 // The number of words on the sync server should not exceed the limits.
477 int server_size
= static_cast<int>(words_
.size()) -
478 static_cast<int>(dictionary_change
.to_add().size());
479 int max_upload_size
= std::max(
482 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
) -
484 int upload_size
= std::min(
485 static_cast<int>(dictionary_change
.to_add().size()),
488 syncer::SyncChangeList sync_change_list
;
491 for (WordList::const_iterator it
= dictionary_change
.to_add().begin();
492 it
!= dictionary_change
.to_add().end() && i
< upload_size
;
494 std::string word
= *it
;
495 sync_pb::EntitySpecifics specifics
;
496 specifics
.mutable_dictionary()->set_word(word
);
497 sync_change_list
.push_back(syncer::SyncChange(
499 syncer::SyncChange::ACTION_ADD
,
500 syncer::SyncData::CreateLocalData(word
, word
, specifics
)));
503 for (WordList::const_iterator it
= dictionary_change
.to_remove().begin();
504 it
!= dictionary_change
.to_remove().end();
506 std::string word
= *it
;
507 sync_pb::EntitySpecifics specifics
;
508 specifics
.mutable_dictionary()->set_word(word
);
509 sync_change_list
.push_back(syncer::SyncChange(
511 syncer::SyncChange::ACTION_DELETE
,
512 syncer::SyncData::CreateLocalData(word
, word
, specifics
)));
515 // Send the changes to the sync processor.
516 error
= sync_processor_
->ProcessSyncChanges(FROM_HERE
, sync_change_list
);
520 // Turn off syncing of this dictionary if the server already has the maximum
522 if (words_
.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
)
523 StopSyncing(syncer::DICTIONARY
);
528 void SpellcheckCustomDictionary::Notify(
529 const SpellcheckCustomDictionary::Change
& dictionary_change
) {
530 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
531 if (!IsLoaded() || dictionary_change
.empty())
533 FOR_EACH_OBSERVER(Observer
,
535 OnCustomDictionaryChanged(dictionary_change
));