1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
9 #include "base/files/file_util.h"
10 #include "base/files/important_file_writer.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
15 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/spellcheck_common.h"
17 #include "chrome/common/spellcheck_messages.h"
18 #include "content/public/browser/browser_thread.h"
19 #include "sync/api/sync_change.h"
20 #include "sync/api/sync_error_factory.h"
21 #include "sync/protocol/sync.pb.h"
23 using content::BrowserThread
;
27 // Filename extension for backup dictionary file.
28 const base::FilePath::CharType BACKUP_EXTENSION
[] = FILE_PATH_LITERAL("backup");
30 // Prefix for the checksum in the dictionary file.
31 const char CHECKSUM_PREFIX
[] = "checksum_v1 = ";
33 // The status of the checksum in a custom spellcheck dictionary.
39 // The result of a dictionary sanitation. Can be used as a bitmap.
40 enum ChangeSanitationResult
{
41 // The change is valid and can be applied as-is.
44 // The change contained words to be added that are not valid.
45 DETECTED_INVALID_WORDS
= 1,
47 // The change contained words to be added that are already in the dictionary.
48 DETECTED_DUPLICATE_WORDS
= 2,
50 // The change contained words to be removed that are not in the dictionary.
51 DETECTED_MISSING_WORDS
= 4,
54 // Loads the file at |file_path| into the |words| container. If the file has a
55 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
56 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
57 ChecksumStatus
LoadFile(const base::FilePath
& file_path
,
58 std::set
<std::string
>* words
) {
59 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
63 base::ReadFileToString(file_path
, &contents
);
64 size_t pos
= contents
.rfind(CHECKSUM_PREFIX
);
65 if (pos
!= std::string::npos
) {
66 std::string checksum
= contents
.substr(pos
+ strlen(CHECKSUM_PREFIX
));
67 contents
= contents
.substr(0, pos
);
68 if (checksum
!= base::MD5String(contents
))
69 return INVALID_CHECKSUM
;
71 base::TrimWhitespaceASCII(contents
, base::TRIM_ALL
, &contents
);
72 std::vector
<std::string
> word_list
;
73 base::SplitString(contents
, '\n', &word_list
);
74 words
->insert(word_list
.begin(), word_list
.end());
75 return VALID_CHECKSUM
;
78 // Returns true for valid custom dictionary words.
79 bool IsValidWord(const std::string
& word
) {
81 return !word
.empty() &&
83 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES
&&
84 base::IsStringUTF8(word
) &&
86 base::TrimWhitespaceASCII(word
, base::TRIM_ALL
, &tmp
);
89 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
90 // the dictionary checksum is not valid, but backup checksum is valid, then
91 // restores the backup and loads that into |custom_words| instead. If the backup
92 // is invalid too, then clears |custom_words|. Must be called on the file
94 void LoadDictionaryFileReliably(const base::FilePath
& path
,
95 std::set
<std::string
>* custom_words
) {
96 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
98 // Load the contents and verify the checksum.
99 if (LoadFile(path
, custom_words
) == VALID_CHECKSUM
)
101 // Checksum is not valid. See if there's a backup.
102 base::FilePath backup
= path
.AddExtension(BACKUP_EXTENSION
);
103 if (!base::PathExists(backup
))
105 // Load the backup and verify its checksum.
106 if (LoadFile(backup
, custom_words
) != VALID_CHECKSUM
)
108 // Backup checksum is valid. Restore the backup.
109 base::CopyFile(backup
, path
);
112 // Backs up the original dictionary, saves |custom_words| and its checksum into
113 // the custom spellcheck dictionary at |path|.
114 void SaveDictionaryFileReliably(const base::FilePath
& path
,
115 const std::set
<std::string
>& custom_words
) {
116 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
117 std::stringstream content
;
118 for (const std::string
& word
: custom_words
)
119 content
<< word
<< '\n';
121 std::string checksum
= base::MD5String(content
.str());
122 content
<< CHECKSUM_PREFIX
<< checksum
;
123 base::CopyFile(path
, path
.AddExtension(BACKUP_EXTENSION
));
124 base::ImportantFileWriter::WriteFileAtomically(path
, content
.str());
127 // Removes duplicate and invalid words from |to_add| word list. Looks for
128 // duplicates in both |to_add| and |existing| word lists. Returns a bitmap of
129 // |ChangeSanitationResult| values.
130 int SanitizeWordsToAdd(const std::set
<std::string
>& existing
,
131 std::set
<std::string
>* to_add
) {
133 // Do not add duplicate words.
134 std::set
<std::string
> new_words
=
135 base::STLSetDifference
<std::set
<std::string
>>(*to_add
, existing
);
136 int result
= VALID_CHANGE
;
137 if (to_add
->size() != new_words
.size())
138 result
|= DETECTED_DUPLICATE_WORDS
;
139 // Do not add invalid words.
140 std::set
<std::string
> valid_new_words
;
141 for (const std::string
& word
: new_words
) {
142 if (IsValidWord(word
))
143 valid_new_words
.insert(valid_new_words
.end(), word
);
145 if (valid_new_words
.size() != new_words
.size())
146 result
|= DETECTED_INVALID_WORDS
;
147 // Save the sanitized words to be added.
148 std::swap(*to_add
, valid_new_words
);
152 // Removes word from |to_remove| that are missing from |existing| word list and
153 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
154 int SanitizeWordsToRemove(const std::set
<std::string
>& existing
,
155 std::set
<std::string
>* to_remove
) {
157 // Do not remove words that are missing from the dictionary.
158 std::set
<std::string
> found_words
=
159 base::STLSetIntersection
<std::set
<std::string
>>(existing
, *to_remove
);
160 int result
= VALID_CHANGE
;
161 if (to_remove
->size() > found_words
.size())
162 result
|= DETECTED_MISSING_WORDS
;
163 // Save the sanitized words to be removed.
164 std::swap(*to_remove
, found_words
);
170 SpellcheckCustomDictionary::Change::Change() {
173 SpellcheckCustomDictionary::Change::~Change() {
176 void SpellcheckCustomDictionary::Change::AddWord(const std::string
& word
) {
177 to_add_
.insert(word
);
180 void SpellcheckCustomDictionary::Change::AddWords(
181 const std::set
<std::string
>& words
) {
182 to_add_
.insert(words
.begin(), words
.end());
185 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string
& word
) {
186 to_remove_
.insert(word
);
189 int SpellcheckCustomDictionary::Change::Sanitize(
190 const std::set
<std::string
>& words
) {
191 int result
= VALID_CHANGE
;
192 if (!to_add_
.empty())
193 result
|= SanitizeWordsToAdd(words
, &to_add_
);
194 if (!to_remove_
.empty())
195 result
|= SanitizeWordsToRemove(words
, &to_remove_
);
199 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
200 const base::FilePath
& dictionary_directory_name
)
201 : custom_dictionary_path_(
202 dictionary_directory_name
.Append(chrome::kCustomDictionaryFileName
)),
204 weak_ptr_factory_(this) {
207 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
210 const std::set
<std::string
>& SpellcheckCustomDictionary::GetWords() const {
211 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
215 bool SpellcheckCustomDictionary::AddWord(const std::string
& word
) {
216 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
217 scoped_ptr
<Change
> dictionary_change(new Change
);
218 dictionary_change
->AddWord(word
);
219 int result
= dictionary_change
->Sanitize(GetWords());
220 Apply(*dictionary_change
);
221 Notify(*dictionary_change
);
222 Sync(*dictionary_change
);
223 Save(dictionary_change
.Pass());
224 return result
== VALID_CHANGE
;
227 bool SpellcheckCustomDictionary::RemoveWord(const std::string
& word
) {
228 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
229 scoped_ptr
<Change
> dictionary_change(new Change
);
230 dictionary_change
->RemoveWord(word
);
231 int result
= dictionary_change
->Sanitize(GetWords());
232 Apply(*dictionary_change
);
233 Notify(*dictionary_change
);
234 Sync(*dictionary_change
);
235 Save(dictionary_change
.Pass());
236 return result
== VALID_CHANGE
;
239 bool SpellcheckCustomDictionary::HasWord(const std::string
& word
) const {
240 return !!words_
.count(word
);
243 void SpellcheckCustomDictionary::AddObserver(Observer
* observer
) {
244 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
246 observers_
.AddObserver(observer
);
249 void SpellcheckCustomDictionary::RemoveObserver(Observer
* observer
) {
250 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
252 observers_
.RemoveObserver(observer
);
255 bool SpellcheckCustomDictionary::IsLoaded() {
256 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
260 bool SpellcheckCustomDictionary::IsSyncing() {
261 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
262 return !!sync_processor_
.get();
265 void SpellcheckCustomDictionary::Load() {
266 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
267 BrowserThread::PostTaskAndReplyWithResult(
270 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile
,
271 custom_dictionary_path_
),
272 base::Bind(&SpellcheckCustomDictionary::OnLoaded
,
273 weak_ptr_factory_
.GetWeakPtr()));
276 syncer::SyncMergeResult
SpellcheckCustomDictionary::MergeDataAndStartSyncing(
277 syncer::ModelType type
,
278 const syncer::SyncDataList
& initial_sync_data
,
279 scoped_ptr
<syncer::SyncChangeProcessor
> sync_processor
,
280 scoped_ptr
<syncer::SyncErrorFactory
> sync_error_handler
) {
281 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
282 DCHECK(!sync_processor_
.get());
283 DCHECK(!sync_error_handler_
.get());
284 DCHECK(sync_processor
.get());
285 DCHECK(sync_error_handler
.get());
286 DCHECK_EQ(syncer::DICTIONARY
, type
);
287 sync_processor_
= sync_processor
.Pass();
288 sync_error_handler_
= sync_error_handler
.Pass();
290 // Build a list of words to add locally.
291 scoped_ptr
<Change
> to_change_locally(new Change
);
292 for (const syncer::SyncData
& data
: initial_sync_data
) {
293 DCHECK_EQ(syncer::DICTIONARY
, data
.GetDataType());
294 to_change_locally
->AddWord(data
.GetSpecifics().dictionary().word());
297 // Add as many as possible local words remotely.
298 to_change_locally
->Sanitize(GetWords());
299 Change to_change_remotely
;
300 to_change_remotely
.AddWords(base::STLSetDifference
<std::set
<std::string
>>(
301 words_
, to_change_locally
->to_add()));
303 // Add remote words locally.
304 Apply(*to_change_locally
);
305 Notify(*to_change_locally
);
306 Save(to_change_locally
.Pass());
308 // Send local changes to the sync server.
309 syncer::SyncMergeResult
result(type
);
310 result
.set_error(Sync(to_change_remotely
));
314 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type
) {
315 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
316 DCHECK_EQ(syncer::DICTIONARY
, type
);
317 sync_processor_
.reset();
318 sync_error_handler_
.reset();
321 syncer::SyncDataList
SpellcheckCustomDictionary::GetAllSyncData(
322 syncer::ModelType type
) const {
323 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
324 DCHECK_EQ(syncer::DICTIONARY
, type
);
325 syncer::SyncDataList data
;
328 for (auto it
= words_
.begin();
329 it
!= words_
.end() &&
330 i
< chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
;
333 sync_pb::EntitySpecifics specifics
;
334 specifics
.mutable_dictionary()->set_word(word
);
335 data
.push_back(syncer::SyncData::CreateLocalData(word
, word
, specifics
));
340 syncer::SyncError
SpellcheckCustomDictionary::ProcessSyncChanges(
341 const tracked_objects::Location
& from_here
,
342 const syncer::SyncChangeList
& change_list
) {
343 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
344 scoped_ptr
<Change
> dictionary_change(new Change
);
345 for (const syncer::SyncChange
& change
: change_list
) {
346 DCHECK(change
.IsValid());
347 const std::string
& word
=
348 change
.sync_data().GetSpecifics().dictionary().word();
349 switch (change
.change_type()) {
350 case syncer::SyncChange::ACTION_ADD
:
351 dictionary_change
->AddWord(word
);
353 case syncer::SyncChange::ACTION_DELETE
:
354 dictionary_change
->RemoveWord(word
);
356 case syncer::SyncChange::ACTION_UPDATE
:
357 // Intentionally fall through.
358 case syncer::SyncChange::ACTION_INVALID
:
359 return sync_error_handler_
->CreateAndUploadError(
361 "Processing sync changes failed on change type " +
362 syncer::SyncChange::ChangeTypeToString(change
.change_type()));
366 dictionary_change
->Sanitize(GetWords());
367 Apply(*dictionary_change
);
368 Notify(*dictionary_change
);
369 Save(dictionary_change
.Pass());
371 return syncer::SyncError();
375 scoped_ptr
<std::set
<std::string
>>
376 SpellcheckCustomDictionary::LoadDictionaryFile(const base::FilePath
& path
) {
377 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
378 scoped_ptr
<std::set
<std::string
>> words(new std::set
<std::string
>);
379 LoadDictionaryFileReliably(path
, words
.get());
380 if (!words
->empty() &&
382 SanitizeWordsToAdd(std::set
<std::string
>(), words
.get())) {
383 SaveDictionaryFileReliably(path
, *words
);
385 SpellCheckHostMetrics::RecordCustomWordCountStats(words
->size());
390 void SpellcheckCustomDictionary::UpdateDictionaryFile(
391 scoped_ptr
<Change
> dictionary_change
,
392 const base::FilePath
& path
) {
393 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
394 DCHECK(dictionary_change
);
396 if (dictionary_change
->empty())
399 std::set
<std::string
> custom_words
;
400 LoadDictionaryFileReliably(path
, &custom_words
);
403 custom_words
.insert(dictionary_change
->to_add().begin(),
404 dictionary_change
->to_add().end());
406 // Remove words and save the remainder.
407 SaveDictionaryFileReliably(path
,
408 base::STLSetDifference
<std::set
<std::string
>>(
409 custom_words
, dictionary_change
->to_remove()));
412 void SpellcheckCustomDictionary::OnLoaded(
413 scoped_ptr
<std::set
<std::string
>> custom_words
) {
414 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
415 DCHECK(custom_words
);
416 Change dictionary_change
;
417 dictionary_change
.AddWords(*custom_words
);
418 dictionary_change
.Sanitize(GetWords());
419 Apply(dictionary_change
);
420 Sync(dictionary_change
);
422 FOR_EACH_OBSERVER(Observer
, observers_
, OnCustomDictionaryLoaded());
425 void SpellcheckCustomDictionary::Apply(const Change
& dictionary_change
) {
426 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
427 if (!dictionary_change
.to_add().empty()) {
428 words_
.insert(dictionary_change
.to_add().begin(),
429 dictionary_change
.to_add().end());
431 if (!dictionary_change
.to_remove().empty()) {
432 std::set
<std::string
> updated_words
=
433 base::STLSetDifference
<std::set
<std::string
>>(
434 words_
, dictionary_change
.to_remove());
435 std::swap(words_
, updated_words
);
439 void SpellcheckCustomDictionary::Save(scoped_ptr
<Change
> dictionary_change
) {
440 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
441 BrowserThread::PostTask(
442 BrowserThread::FILE, FROM_HERE
,
443 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile
,
444 base::Passed(&dictionary_change
), custom_dictionary_path_
));
447 syncer::SyncError
SpellcheckCustomDictionary::Sync(
448 const Change
& dictionary_change
) {
449 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
450 syncer::SyncError error
;
451 if (!IsSyncing() || dictionary_change
.empty())
454 // The number of words on the sync server should not exceed the limits.
455 int server_size
= static_cast<int>(words_
.size()) -
456 static_cast<int>(dictionary_change
.to_add().size());
457 int max_upload_size
= std::max(
460 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
) -
462 int upload_size
= std::min(
463 static_cast<int>(dictionary_change
.to_add().size()),
466 syncer::SyncChangeList sync_change_list
;
469 for (auto it
= dictionary_change
.to_add().begin();
470 it
!= dictionary_change
.to_add().end() && i
< upload_size
; ++it
, ++i
) {
471 const std::string
& word
= *it
;
472 sync_pb::EntitySpecifics specifics
;
473 specifics
.mutable_dictionary()->set_word(word
);
474 sync_change_list
.push_back(syncer::SyncChange(
475 FROM_HERE
, syncer::SyncChange::ACTION_ADD
,
476 syncer::SyncData::CreateLocalData(word
, word
, specifics
)));
479 for (const std::string
& word
: dictionary_change
.to_remove()) {
480 sync_pb::EntitySpecifics specifics
;
481 specifics
.mutable_dictionary()->set_word(word
);
482 sync_change_list
.push_back(syncer::SyncChange(
484 syncer::SyncChange::ACTION_DELETE
,
485 syncer::SyncData::CreateLocalData(word
, word
, specifics
)));
488 // Send the changes to the sync processor.
489 error
= sync_processor_
->ProcessSyncChanges(FROM_HERE
, sync_change_list
);
493 // Turn off syncing of this dictionary if the server already has the maximum
495 if (words_
.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
)
496 StopSyncing(syncer::DICTIONARY
);
501 void SpellcheckCustomDictionary::Notify(const Change
& dictionary_change
) {
502 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
503 if (!IsLoaded() || dictionary_change
.empty())
505 FOR_EACH_OBSERVER(Observer
,
507 OnCustomDictionaryChanged(dictionary_change
));