1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/spellchecker/spellcheck_custom_dictionary.h"
9 #include "base/files/file_util.h"
10 #include "base/files/important_file_writer.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "base/strings/string_split.h"
14 #include "base/strings/string_util.h"
15 #include "chrome/browser/spellchecker/spellcheck_host_metrics.h"
16 #include "chrome/common/chrome_constants.h"
17 #include "chrome/common/spellcheck_common.h"
18 #include "content/public/browser/browser_thread.h"
19 #include "sync/api/sync_change.h"
20 #include "sync/api/sync_error_factory.h"
21 #include "sync/protocol/sync.pb.h"
23 using content::BrowserThread
;
27 // Filename extension for backup dictionary file.
28 const base::FilePath::CharType BACKUP_EXTENSION
[] = FILE_PATH_LITERAL("backup");
30 // Prefix for the checksum in the dictionary file.
31 const char CHECKSUM_PREFIX
[] = "checksum_v1 = ";
33 // The status of the checksum in a custom spellcheck dictionary.
39 // The result of a dictionary sanitation. Can be used as a bitmap.
40 enum ChangeSanitationResult
{
41 // The change is valid and can be applied as-is.
44 // The change contained words to be added that are not valid.
45 DETECTED_INVALID_WORDS
= 1,
47 // The change contained words to be added that are already in the dictionary.
48 DETECTED_DUPLICATE_WORDS
= 2,
50 // The change contained words to be removed that are not in the dictionary.
51 DETECTED_MISSING_WORDS
= 4,
54 // Loads the file at |file_path| into the |words| container. If the file has a
55 // valid checksum, then returns ChecksumStatus::VALID. If the file has an
56 // invalid checksum, then returns ChecksumStatus::INVALID and clears |words|.
57 ChecksumStatus
LoadFile(const base::FilePath
& file_path
,
58 std::set
<std::string
>* words
) {
59 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
63 base::ReadFileToString(file_path
, &contents
);
64 size_t pos
= contents
.rfind(CHECKSUM_PREFIX
);
65 if (pos
!= std::string::npos
) {
66 std::string checksum
= contents
.substr(pos
+ strlen(CHECKSUM_PREFIX
));
67 contents
= contents
.substr(0, pos
);
68 if (checksum
!= base::MD5String(contents
))
69 return INVALID_CHECKSUM
;
72 std::vector
<std::string
> word_list
= base::SplitString(
73 base::TrimWhitespaceASCII(contents
, base::TRIM_ALL
), "\n",
74 base::TRIM_WHITESPACE
, base::SPLIT_WANT_ALL
);
75 words
->insert(word_list
.begin(), word_list
.end());
76 return VALID_CHECKSUM
;
79 // Returns true for valid custom dictionary words.
80 bool IsValidWord(const std::string
& word
) {
82 return !word
.empty() &&
84 chrome::spellcheck_common::MAX_CUSTOM_DICTIONARY_WORD_BYTES
&&
85 base::IsStringUTF8(word
) &&
87 base::TrimWhitespaceASCII(word
, base::TRIM_ALL
, &tmp
);
90 // Loads the custom spellcheck dictionary from |path| into |custom_words|. If
91 // the dictionary checksum is not valid, but backup checksum is valid, then
92 // restores the backup and loads that into |custom_words| instead. If the backup
93 // is invalid too, then clears |custom_words|. Must be called on the file
95 void LoadDictionaryFileReliably(const base::FilePath
& path
,
96 std::set
<std::string
>* custom_words
) {
97 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
99 // Load the contents and verify the checksum.
100 if (LoadFile(path
, custom_words
) == VALID_CHECKSUM
)
102 // Checksum is not valid. See if there's a backup.
103 base::FilePath backup
= path
.AddExtension(BACKUP_EXTENSION
);
104 if (!base::PathExists(backup
))
106 // Load the backup and verify its checksum.
107 if (LoadFile(backup
, custom_words
) != VALID_CHECKSUM
)
109 // Backup checksum is valid. Restore the backup.
110 base::CopyFile(backup
, path
);
113 // Backs up the original dictionary, saves |custom_words| and its checksum into
114 // the custom spellcheck dictionary at |path|.
115 void SaveDictionaryFileReliably(const base::FilePath
& path
,
116 const std::set
<std::string
>& custom_words
) {
117 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
118 std::stringstream content
;
119 for (const std::string
& word
: custom_words
)
120 content
<< word
<< '\n';
122 std::string checksum
= base::MD5String(content
.str());
123 content
<< CHECKSUM_PREFIX
<< checksum
;
124 base::CopyFile(path
, path
.AddExtension(BACKUP_EXTENSION
));
125 base::ImportantFileWriter::WriteFileAtomically(path
, content
.str());
128 // Removes duplicate and invalid words from |to_add| word list. Looks for
129 // duplicates in both |to_add| and |existing| word lists. Returns a bitmap of
130 // |ChangeSanitationResult| values.
131 int SanitizeWordsToAdd(const std::set
<std::string
>& existing
,
132 std::set
<std::string
>* to_add
) {
134 // Do not add duplicate words.
135 std::set
<std::string
> new_words
=
136 base::STLSetDifference
<std::set
<std::string
>>(*to_add
, existing
);
137 int result
= VALID_CHANGE
;
138 if (to_add
->size() != new_words
.size())
139 result
|= DETECTED_DUPLICATE_WORDS
;
140 // Do not add invalid words.
141 std::set
<std::string
> valid_new_words
;
142 for (const std::string
& word
: new_words
) {
143 if (IsValidWord(word
))
144 valid_new_words
.insert(valid_new_words
.end(), word
);
146 if (valid_new_words
.size() != new_words
.size())
147 result
|= DETECTED_INVALID_WORDS
;
148 // Save the sanitized words to be added.
149 std::swap(*to_add
, valid_new_words
);
153 // Removes word from |to_remove| that are missing from |existing| word list and
154 // sorts |to_remove|. Returns a bitmap of |ChangeSanitationResult| values.
155 int SanitizeWordsToRemove(const std::set
<std::string
>& existing
,
156 std::set
<std::string
>* to_remove
) {
158 // Do not remove words that are missing from the dictionary.
159 std::set
<std::string
> found_words
=
160 base::STLSetIntersection
<std::set
<std::string
>>(existing
, *to_remove
);
161 int result
= VALID_CHANGE
;
162 if (to_remove
->size() > found_words
.size())
163 result
|= DETECTED_MISSING_WORDS
;
164 // Save the sanitized words to be removed.
165 std::swap(*to_remove
, found_words
);
171 SpellcheckCustomDictionary::Change::Change() {
174 SpellcheckCustomDictionary::Change::~Change() {
177 void SpellcheckCustomDictionary::Change::AddWord(const std::string
& word
) {
178 to_add_
.insert(word
);
181 void SpellcheckCustomDictionary::Change::AddWords(
182 const std::set
<std::string
>& words
) {
183 to_add_
.insert(words
.begin(), words
.end());
186 void SpellcheckCustomDictionary::Change::RemoveWord(const std::string
& word
) {
187 to_remove_
.insert(word
);
190 int SpellcheckCustomDictionary::Change::Sanitize(
191 const std::set
<std::string
>& words
) {
192 int result
= VALID_CHANGE
;
193 if (!to_add_
.empty())
194 result
|= SanitizeWordsToAdd(words
, &to_add_
);
195 if (!to_remove_
.empty())
196 result
|= SanitizeWordsToRemove(words
, &to_remove_
);
200 SpellcheckCustomDictionary::SpellcheckCustomDictionary(
201 const base::FilePath
& dictionary_directory_name
)
202 : custom_dictionary_path_(
203 dictionary_directory_name
.Append(chrome::kCustomDictionaryFileName
)),
205 weak_ptr_factory_(this) {
208 SpellcheckCustomDictionary::~SpellcheckCustomDictionary() {
211 const std::set
<std::string
>& SpellcheckCustomDictionary::GetWords() const {
212 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
216 bool SpellcheckCustomDictionary::AddWord(const std::string
& word
) {
217 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
218 scoped_ptr
<Change
> dictionary_change(new Change
);
219 dictionary_change
->AddWord(word
);
220 int result
= dictionary_change
->Sanitize(GetWords());
221 Apply(*dictionary_change
);
222 Notify(*dictionary_change
);
223 Sync(*dictionary_change
);
224 Save(dictionary_change
.Pass());
225 return result
== VALID_CHANGE
;
228 bool SpellcheckCustomDictionary::RemoveWord(const std::string
& word
) {
229 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
230 scoped_ptr
<Change
> dictionary_change(new Change
);
231 dictionary_change
->RemoveWord(word
);
232 int result
= dictionary_change
->Sanitize(GetWords());
233 Apply(*dictionary_change
);
234 Notify(*dictionary_change
);
235 Sync(*dictionary_change
);
236 Save(dictionary_change
.Pass());
237 return result
== VALID_CHANGE
;
240 bool SpellcheckCustomDictionary::HasWord(const std::string
& word
) const {
241 return !!words_
.count(word
);
244 void SpellcheckCustomDictionary::AddObserver(Observer
* observer
) {
245 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
247 observers_
.AddObserver(observer
);
250 void SpellcheckCustomDictionary::RemoveObserver(Observer
* observer
) {
251 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
253 observers_
.RemoveObserver(observer
);
256 bool SpellcheckCustomDictionary::IsLoaded() {
257 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
261 bool SpellcheckCustomDictionary::IsSyncing() {
262 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
263 return !!sync_processor_
.get();
266 void SpellcheckCustomDictionary::Load() {
267 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
268 BrowserThread::PostTaskAndReplyWithResult(
271 base::Bind(&SpellcheckCustomDictionary::LoadDictionaryFile
,
272 custom_dictionary_path_
),
273 base::Bind(&SpellcheckCustomDictionary::OnLoaded
,
274 weak_ptr_factory_
.GetWeakPtr()));
277 syncer::SyncMergeResult
SpellcheckCustomDictionary::MergeDataAndStartSyncing(
278 syncer::ModelType type
,
279 const syncer::SyncDataList
& initial_sync_data
,
280 scoped_ptr
<syncer::SyncChangeProcessor
> sync_processor
,
281 scoped_ptr
<syncer::SyncErrorFactory
> sync_error_handler
) {
282 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
283 DCHECK(!sync_processor_
.get());
284 DCHECK(!sync_error_handler_
.get());
285 DCHECK(sync_processor
.get());
286 DCHECK(sync_error_handler
.get());
287 DCHECK_EQ(syncer::DICTIONARY
, type
);
288 sync_processor_
= sync_processor
.Pass();
289 sync_error_handler_
= sync_error_handler
.Pass();
291 // Build a list of words to add locally.
292 scoped_ptr
<Change
> to_change_locally(new Change
);
293 for (const syncer::SyncData
& data
: initial_sync_data
) {
294 DCHECK_EQ(syncer::DICTIONARY
, data
.GetDataType());
295 to_change_locally
->AddWord(data
.GetSpecifics().dictionary().word());
298 // Add as many as possible local words remotely.
299 to_change_locally
->Sanitize(GetWords());
300 Change to_change_remotely
;
301 to_change_remotely
.AddWords(base::STLSetDifference
<std::set
<std::string
>>(
302 words_
, to_change_locally
->to_add()));
304 // Add remote words locally.
305 Apply(*to_change_locally
);
306 Notify(*to_change_locally
);
307 Save(to_change_locally
.Pass());
309 // Send local changes to the sync server.
310 syncer::SyncMergeResult
result(type
);
311 result
.set_error(Sync(to_change_remotely
));
315 void SpellcheckCustomDictionary::StopSyncing(syncer::ModelType type
) {
316 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
317 DCHECK_EQ(syncer::DICTIONARY
, type
);
318 sync_processor_
.reset();
319 sync_error_handler_
.reset();
322 syncer::SyncDataList
SpellcheckCustomDictionary::GetAllSyncData(
323 syncer::ModelType type
) const {
324 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
325 DCHECK_EQ(syncer::DICTIONARY
, type
);
326 syncer::SyncDataList data
;
329 for (auto it
= words_
.begin();
330 it
!= words_
.end() &&
331 i
< chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
;
334 sync_pb::EntitySpecifics specifics
;
335 specifics
.mutable_dictionary()->set_word(word
);
336 data
.push_back(syncer::SyncData::CreateLocalData(word
, word
, specifics
));
341 syncer::SyncError
SpellcheckCustomDictionary::ProcessSyncChanges(
342 const tracked_objects::Location
& from_here
,
343 const syncer::SyncChangeList
& change_list
) {
344 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
345 scoped_ptr
<Change
> dictionary_change(new Change
);
346 for (const syncer::SyncChange
& change
: change_list
) {
347 DCHECK(change
.IsValid());
348 const std::string
& word
=
349 change
.sync_data().GetSpecifics().dictionary().word();
350 switch (change
.change_type()) {
351 case syncer::SyncChange::ACTION_ADD
:
352 dictionary_change
->AddWord(word
);
354 case syncer::SyncChange::ACTION_DELETE
:
355 dictionary_change
->RemoveWord(word
);
357 case syncer::SyncChange::ACTION_UPDATE
:
358 // Intentionally fall through.
359 case syncer::SyncChange::ACTION_INVALID
:
360 return sync_error_handler_
->CreateAndUploadError(
362 "Processing sync changes failed on change type " +
363 syncer::SyncChange::ChangeTypeToString(change
.change_type()));
367 dictionary_change
->Sanitize(GetWords());
368 Apply(*dictionary_change
);
369 Notify(*dictionary_change
);
370 Save(dictionary_change
.Pass());
372 return syncer::SyncError();
376 scoped_ptr
<std::set
<std::string
>>
377 SpellcheckCustomDictionary::LoadDictionaryFile(const base::FilePath
& path
) {
378 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
379 scoped_ptr
<std::set
<std::string
>> words(new std::set
<std::string
>);
380 LoadDictionaryFileReliably(path
, words
.get());
381 if (!words
->empty() &&
383 SanitizeWordsToAdd(std::set
<std::string
>(), words
.get())) {
384 SaveDictionaryFileReliably(path
, *words
);
386 SpellCheckHostMetrics::RecordCustomWordCountStats(words
->size());
391 void SpellcheckCustomDictionary::UpdateDictionaryFile(
392 scoped_ptr
<Change
> dictionary_change
,
393 const base::FilePath
& path
) {
394 DCHECK_CURRENTLY_ON(BrowserThread::FILE);
395 DCHECK(dictionary_change
);
397 if (dictionary_change
->empty())
400 std::set
<std::string
> custom_words
;
401 LoadDictionaryFileReliably(path
, &custom_words
);
404 custom_words
.insert(dictionary_change
->to_add().begin(),
405 dictionary_change
->to_add().end());
407 // Remove words and save the remainder.
408 SaveDictionaryFileReliably(path
,
409 base::STLSetDifference
<std::set
<std::string
>>(
410 custom_words
, dictionary_change
->to_remove()));
413 void SpellcheckCustomDictionary::OnLoaded(
414 scoped_ptr
<std::set
<std::string
>> custom_words
) {
415 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
416 DCHECK(custom_words
);
417 Change dictionary_change
;
418 dictionary_change
.AddWords(*custom_words
);
419 dictionary_change
.Sanitize(GetWords());
420 Apply(dictionary_change
);
421 Sync(dictionary_change
);
423 FOR_EACH_OBSERVER(Observer
, observers_
, OnCustomDictionaryLoaded());
426 void SpellcheckCustomDictionary::Apply(const Change
& dictionary_change
) {
427 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
428 if (!dictionary_change
.to_add().empty()) {
429 words_
.insert(dictionary_change
.to_add().begin(),
430 dictionary_change
.to_add().end());
432 if (!dictionary_change
.to_remove().empty()) {
433 std::set
<std::string
> updated_words
=
434 base::STLSetDifference
<std::set
<std::string
>>(
435 words_
, dictionary_change
.to_remove());
436 std::swap(words_
, updated_words
);
440 void SpellcheckCustomDictionary::Save(scoped_ptr
<Change
> dictionary_change
) {
441 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
442 BrowserThread::PostTask(
443 BrowserThread::FILE, FROM_HERE
,
444 base::Bind(&SpellcheckCustomDictionary::UpdateDictionaryFile
,
445 base::Passed(&dictionary_change
), custom_dictionary_path_
));
448 syncer::SyncError
SpellcheckCustomDictionary::Sync(
449 const Change
& dictionary_change
) {
450 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
451 syncer::SyncError error
;
452 if (!IsSyncing() || dictionary_change
.empty())
455 // The number of words on the sync server should not exceed the limits.
456 int server_size
= static_cast<int>(words_
.size()) -
457 static_cast<int>(dictionary_change
.to_add().size());
458 int max_upload_size
= std::max(
461 chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
) -
463 int upload_size
= std::min(
464 static_cast<int>(dictionary_change
.to_add().size()),
467 syncer::SyncChangeList sync_change_list
;
470 for (auto it
= dictionary_change
.to_add().begin();
471 it
!= dictionary_change
.to_add().end() && i
< upload_size
; ++it
, ++i
) {
472 const std::string
& word
= *it
;
473 sync_pb::EntitySpecifics specifics
;
474 specifics
.mutable_dictionary()->set_word(word
);
475 sync_change_list
.push_back(syncer::SyncChange(
476 FROM_HERE
, syncer::SyncChange::ACTION_ADD
,
477 syncer::SyncData::CreateLocalData(word
, word
, specifics
)));
480 for (const std::string
& word
: dictionary_change
.to_remove()) {
481 sync_pb::EntitySpecifics specifics
;
482 specifics
.mutable_dictionary()->set_word(word
);
483 sync_change_list
.push_back(syncer::SyncChange(
485 syncer::SyncChange::ACTION_DELETE
,
486 syncer::SyncData::CreateLocalData(word
, word
, specifics
)));
489 // Send the changes to the sync processor.
490 error
= sync_processor_
->ProcessSyncChanges(FROM_HERE
, sync_change_list
);
494 // Turn off syncing of this dictionary if the server already has the maximum
496 if (words_
.size() > chrome::spellcheck_common::MAX_SYNCABLE_DICTIONARY_WORDS
)
497 StopSyncing(syncer::DICTIONARY
);
502 void SpellcheckCustomDictionary::Notify(const Change
& dictionary_change
) {
503 DCHECK_CURRENTLY_ON(BrowserThread::UI
);
504 if (!IsLoaded() || dictionary_change
.empty())
506 FOR_EACH_OBSERVER(Observer
,
508 OnCustomDictionaryChanged(dictionary_change
));