Add more checks to investigate SupervisedUserPrefStore crash at startup.
[chromium-blink-merge.git] / chrome / browser / devtools / devtools_file_system_indexer.cc
blobde03d6899cdbe3df6d1298315ce287be19f255b1
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/devtools/devtools_file_system_indexer.h"
7 #include <iterator>
9 #include "base/bind.h"
10 #include "base/callback.h"
11 #include "base/files/file_enumerator.h"
12 #include "base/files/file_util.h"
13 #include "base/files/file_util_proxy.h"
14 #include "base/lazy_instance.h"
15 #include "base/logging.h"
16 #include "base/stl_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "content/public/browser/browser_thread.h"
20 using base::Bind;
21 using base::Callback;
22 using base::FileEnumerator;
23 using base::FilePath;
24 using base::Time;
25 using base::TimeDelta;
26 using base::TimeTicks;
27 using content::BrowserThread;
28 using std::map;
29 using std::set;
30 using std::string;
31 using std::vector;
33 namespace {
35 typedef int32 Trigram;
36 typedef char TrigramChar;
37 typedef uint16 FileId;
39 const int kMinTimeoutBetweenWorkedNitification = 200;
40 // Trigram characters include all ASCII printable characters (32-126) except for
41 // the capital letters, because the index is case insensitive.
42 const size_t kTrigramCharacterCount = 126 - 'Z' - 1 + 'A' - ' ' + 1;
43 const size_t kTrigramCount =
44 kTrigramCharacterCount * kTrigramCharacterCount * kTrigramCharacterCount;
45 const int kMaxReadLength = 10 * 1024;
46 const TrigramChar kUndefinedTrigramChar = -1;
47 const TrigramChar kBinaryTrigramChar = -2;
48 const Trigram kUndefinedTrigram = -1;
50 class Index {
51 public:
52 Index();
53 Time LastModifiedTimeForFile(const FilePath& file_path);
54 void SetTrigramsForFile(const FilePath& file_path,
55 const vector<Trigram>& index,
56 const Time& time);
57 vector<FilePath> Search(string query);
58 void PrintStats();
59 void NormalizeVectors();
61 private:
62 ~Index();
64 FileId GetFileId(const FilePath& file_path);
66 typedef map<FilePath, FileId> FileIdsMap;
67 FileIdsMap file_ids_;
68 FileId last_file_id_;
69 // The index in this vector is the trigram id.
70 vector<vector<FileId> > index_;
71 typedef map<FilePath, Time> IndexedFilesMap;
72 IndexedFilesMap index_times_;
73 vector<bool> is_normalized_;
75 DISALLOW_COPY_AND_ASSIGN(Index);
78 base::LazyInstance<Index>::Leaky g_trigram_index = LAZY_INSTANCE_INITIALIZER;
80 TrigramChar TrigramCharForChar(char c) {
81 static TrigramChar* trigram_chars = nullptr;
82 if (!trigram_chars) {
83 trigram_chars = new TrigramChar[256];
84 for (size_t i = 0; i < 256; ++i) {
85 if (i > 127) {
86 trigram_chars[i] = kUndefinedTrigramChar;
87 continue;
89 char ch = static_cast<char>(i);
90 if (ch == '\t')
91 ch = ' ';
92 if (ch >= 'A' && ch <= 'Z')
93 ch = ch - 'A' + 'a';
95 bool is_binary_char = ch < 9 || (ch >= 14 && ch < 32) || ch == 127;
96 if (is_binary_char) {
97 trigram_chars[i] = kBinaryTrigramChar;
98 continue;
101 if (ch < ' ') {
102 trigram_chars[i] = kUndefinedTrigramChar;
103 continue;
106 if (ch >= 'Z')
107 ch = ch - 'Z' - 1 + 'A';
108 ch -= ' ';
109 char signed_trigram_count = static_cast<char>(kTrigramCharacterCount);
110 CHECK(ch >= 0 && ch < signed_trigram_count);
111 trigram_chars[i] = ch;
114 unsigned char uc = static_cast<unsigned char>(c);
115 return trigram_chars[uc];
118 Trigram TrigramAtIndex(const vector<TrigramChar>& trigram_chars, size_t index) {
119 static int kTrigramCharacterCountSquared =
120 kTrigramCharacterCount * kTrigramCharacterCount;
121 if (trigram_chars[index] == kUndefinedTrigramChar ||
122 trigram_chars[index + 1] == kUndefinedTrigramChar ||
123 trigram_chars[index + 2] == kUndefinedTrigramChar)
124 return kUndefinedTrigram;
125 Trigram trigram = kTrigramCharacterCountSquared * trigram_chars[index] +
126 kTrigramCharacterCount * trigram_chars[index + 1] +
127 trigram_chars[index + 2];
128 return trigram;
131 Index::Index() : last_file_id_(0) {
132 index_.resize(kTrigramCount);
133 is_normalized_.resize(kTrigramCount);
134 std::fill(is_normalized_.begin(), is_normalized_.end(), true);
137 Index::~Index() {}
139 Time Index::LastModifiedTimeForFile(const FilePath& file_path) {
140 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
141 Time last_modified_time;
142 if (index_times_.find(file_path) != index_times_.end())
143 last_modified_time = index_times_[file_path];
144 return last_modified_time;
147 void Index::SetTrigramsForFile(const FilePath& file_path,
148 const vector<Trigram>& index,
149 const Time& time) {
150 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
151 FileId file_id = GetFileId(file_path);
152 vector<Trigram>::const_iterator it = index.begin();
153 for (; it != index.end(); ++it) {
154 Trigram trigram = *it;
155 index_[trigram].push_back(file_id);
156 is_normalized_[trigram] = false;
158 index_times_[file_path] = time;
161 vector<FilePath> Index::Search(string query) {
162 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
163 const char* data = query.c_str();
164 vector<TrigramChar> trigram_chars;
165 trigram_chars.reserve(query.size());
166 for (size_t i = 0; i < query.size(); ++i) {
167 TrigramChar trigram_char = TrigramCharForChar(data[i]);
168 if (trigram_char == kBinaryTrigramChar)
169 trigram_char = kUndefinedTrigramChar;
170 trigram_chars.push_back(trigram_char);
172 vector<Trigram> trigrams;
173 for (size_t i = 0; i + 2 < query.size(); ++i) {
174 Trigram trigram = TrigramAtIndex(trigram_chars, i);
175 if (trigram != kUndefinedTrigram)
176 trigrams.push_back(trigram);
178 set<FileId> file_ids;
179 bool first = true;
180 vector<Trigram>::const_iterator it = trigrams.begin();
181 for (; it != trigrams.end(); ++it) {
182 Trigram trigram = *it;
183 if (first) {
184 std::copy(index_[trigram].begin(),
185 index_[trigram].end(),
186 std::inserter(file_ids, file_ids.begin()));
187 first = false;
188 continue;
190 set<FileId> intersection = base::STLSetIntersection<set<FileId> >(
191 file_ids, index_[trigram]);
192 file_ids.swap(intersection);
194 vector<FilePath> result;
195 FileIdsMap::const_iterator ids_it = file_ids_.begin();
196 for (; ids_it != file_ids_.end(); ++ids_it) {
197 if (trigrams.size() == 0 ||
198 file_ids.find(ids_it->second) != file_ids.end()) {
199 result.push_back(ids_it->first);
202 return result;
205 FileId Index::GetFileId(const FilePath& file_path) {
206 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
207 string file_path_str = file_path.AsUTF8Unsafe();
208 if (file_ids_.find(file_path) != file_ids_.end())
209 return file_ids_[file_path];
210 file_ids_[file_path] = ++last_file_id_;
211 return last_file_id_;
214 void Index::NormalizeVectors() {
215 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
216 for (size_t i = 0; i < kTrigramCount; ++i) {
217 if (!is_normalized_[i]) {
218 std::sort(index_[i].begin(), index_[i].end());
219 if (index_[i].capacity() > index_[i].size())
220 vector<FileId>(index_[i]).swap(index_[i]);
221 is_normalized_[i] = true;
226 void Index::PrintStats() {
227 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
228 LOG(ERROR) << "Index stats:";
229 size_t size = 0;
230 size_t maxSize = 0;
231 size_t capacity = 0;
232 for (size_t i = 0; i < kTrigramCount; ++i) {
233 if (index_[i].size() > maxSize)
234 maxSize = index_[i].size();
235 size += index_[i].size();
236 capacity += index_[i].capacity();
238 LOG(ERROR) << " - total trigram count: " << size;
239 LOG(ERROR) << " - max file count per trigram: " << maxSize;
240 LOG(ERROR) << " - total vectors capacity " << capacity;
241 size_t total_index_size =
242 capacity * sizeof(FileId) + sizeof(vector<FileId>) * kTrigramCount;
243 LOG(ERROR) << " - estimated total index size " << total_index_size;
246 typedef Callback<void(bool, const vector<bool>&)> IndexerCallback;
248 } // namespace
250 DevToolsFileSystemIndexer::FileSystemIndexingJob::FileSystemIndexingJob(
251 const FilePath& file_system_path,
252 const TotalWorkCallback& total_work_callback,
253 const WorkedCallback& worked_callback,
254 const DoneCallback& done_callback)
255 : file_system_path_(file_system_path),
256 total_work_callback_(total_work_callback),
257 worked_callback_(worked_callback),
258 done_callback_(done_callback),
259 current_file_(BrowserThread::GetMessageLoopProxyForThread(
260 BrowserThread::FILE).get()),
261 files_indexed_(0),
262 stopped_(false) {
263 current_trigrams_set_.resize(kTrigramCount);
264 current_trigrams_.reserve(kTrigramCount);
267 DevToolsFileSystemIndexer::FileSystemIndexingJob::~FileSystemIndexingJob() {}
269 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Start() {
270 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
271 BrowserThread::PostTask(
272 BrowserThread::FILE,
273 FROM_HERE,
274 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
277 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Stop() {
278 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
279 BrowserThread::PostTask(BrowserThread::FILE,
280 FROM_HERE,
281 Bind(&FileSystemIndexingJob::StopOnFileThread, this));
284 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StopOnFileThread() {
285 stopped_ = true;
288 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CollectFilesToIndex() {
289 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
290 if (stopped_)
291 return;
292 if (!file_enumerator_) {
293 file_enumerator_.reset(
294 new FileEnumerator(file_system_path_, true, FileEnumerator::FILES));
296 FilePath file_path = file_enumerator_->Next();
297 if (file_path.empty()) {
298 BrowserThread::PostTask(
299 BrowserThread::UI,
300 FROM_HERE,
301 Bind(total_work_callback_, file_path_times_.size()));
302 indexing_it_ = file_path_times_.begin();
303 IndexFiles();
304 return;
306 Time saved_last_modified_time =
307 g_trigram_index.Get().LastModifiedTimeForFile(file_path);
308 FileEnumerator::FileInfo file_info = file_enumerator_->GetInfo();
309 Time current_last_modified_time = file_info.GetLastModifiedTime();
310 if (current_last_modified_time > saved_last_modified_time) {
311 file_path_times_[file_path] = current_last_modified_time;
313 BrowserThread::PostTask(
314 BrowserThread::FILE,
315 FROM_HERE,
316 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
319 void DevToolsFileSystemIndexer::FileSystemIndexingJob::IndexFiles() {
320 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
321 if (stopped_)
322 return;
323 if (indexing_it_ == file_path_times_.end()) {
324 g_trigram_index.Get().NormalizeVectors();
325 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, done_callback_);
326 return;
328 FilePath file_path = indexing_it_->first;
329 current_file_.CreateOrOpen(
330 file_path,
331 base::File::FLAG_OPEN | base::File::FLAG_READ,
332 Bind(&FileSystemIndexingJob::StartFileIndexing, this));
335 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StartFileIndexing(
336 base::File::Error error) {
337 if (!current_file_.IsValid()) {
338 FinishFileIndexing(false);
339 return;
341 current_file_offset_ = 0;
342 current_trigrams_.clear();
343 std::fill(current_trigrams_set_.begin(), current_trigrams_set_.end(), false);
344 ReadFromFile();
347 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReadFromFile() {
348 if (stopped_) {
349 CloseFile();
350 return;
352 current_file_.Read(current_file_offset_, kMaxReadLength,
353 Bind(&FileSystemIndexingJob::OnRead, this));
356 void DevToolsFileSystemIndexer::FileSystemIndexingJob::OnRead(
357 base::File::Error error,
358 const char* data,
359 int bytes_read) {
360 if (error != base::File::FILE_OK) {
361 FinishFileIndexing(false);
362 return;
365 if (!bytes_read || bytes_read < 3) {
366 FinishFileIndexing(true);
367 return;
370 size_t size = static_cast<size_t>(bytes_read);
371 vector<TrigramChar> trigram_chars;
372 trigram_chars.reserve(size);
373 for (size_t i = 0; i < size; ++i) {
374 TrigramChar trigram_char = TrigramCharForChar(data[i]);
375 if (trigram_char == kBinaryTrigramChar) {
376 current_trigrams_.clear();
377 FinishFileIndexing(true);
378 return;
380 trigram_chars.push_back(trigram_char);
383 for (size_t i = 0; i + 2 < size; ++i) {
384 Trigram trigram = TrigramAtIndex(trigram_chars, i);
385 if ((trigram != kUndefinedTrigram) && !current_trigrams_set_[trigram]) {
386 current_trigrams_set_[trigram] = true;
387 current_trigrams_.push_back(trigram);
390 current_file_offset_ += bytes_read - 2;
391 ReadFromFile();
394 void DevToolsFileSystemIndexer::FileSystemIndexingJob::FinishFileIndexing(
395 bool success) {
396 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
397 CloseFile();
398 if (success) {
399 FilePath file_path = indexing_it_->first;
400 g_trigram_index.Get().SetTrigramsForFile(
401 file_path, current_trigrams_, file_path_times_[file_path]);
403 ReportWorked();
404 ++indexing_it_;
405 IndexFiles();
408 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseFile() {
409 if (current_file_.IsValid())
410 current_file_.Close(Bind(&FileSystemIndexingJob::CloseCallback, this));
413 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseCallback(
414 base::File::Error error) {}
416 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReportWorked() {
417 TimeTicks current_time = TimeTicks::Now();
418 bool should_send_worked_nitification = true;
419 if (!last_worked_notification_time_.is_null()) {
420 TimeDelta delta = current_time - last_worked_notification_time_;
421 if (delta.InMilliseconds() < kMinTimeoutBetweenWorkedNitification)
422 should_send_worked_nitification = false;
424 ++files_indexed_;
425 if (should_send_worked_nitification) {
426 last_worked_notification_time_ = current_time;
427 BrowserThread::PostTask(
428 BrowserThread::UI, FROM_HERE, Bind(worked_callback_, files_indexed_));
429 files_indexed_ = 0;
433 DevToolsFileSystemIndexer::DevToolsFileSystemIndexer() {
436 DevToolsFileSystemIndexer::~DevToolsFileSystemIndexer() {}
438 scoped_refptr<DevToolsFileSystemIndexer::FileSystemIndexingJob>
439 DevToolsFileSystemIndexer::IndexPath(
440 const string& file_system_path,
441 const TotalWorkCallback& total_work_callback,
442 const WorkedCallback& worked_callback,
443 const DoneCallback& done_callback) {
444 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
445 scoped_refptr<FileSystemIndexingJob> indexing_job =
446 new FileSystemIndexingJob(FilePath::FromUTF8Unsafe(file_system_path),
447 total_work_callback,
448 worked_callback,
449 done_callback);
450 indexing_job->Start();
451 return indexing_job;
454 void DevToolsFileSystemIndexer::SearchInPath(const string& file_system_path,
455 const string& query,
456 const SearchCallback& callback) {
457 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
458 BrowserThread::PostTask(
459 BrowserThread::FILE,
460 FROM_HERE,
461 Bind(&DevToolsFileSystemIndexer::SearchInPathOnFileThread,
462 this,
463 file_system_path,
464 query,
465 callback));
468 void DevToolsFileSystemIndexer::SearchInPathOnFileThread(
469 const string& file_system_path,
470 const string& query,
471 const SearchCallback& callback) {
472 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
473 vector<FilePath> file_paths = g_trigram_index.Get().Search(query);
474 vector<string> result;
475 FilePath path = FilePath::FromUTF8Unsafe(file_system_path);
476 vector<FilePath>::const_iterator it = file_paths.begin();
477 for (; it != file_paths.end(); ++it) {
478 if (path.IsParent(*it))
479 result.push_back(it->AsUTF8Unsafe());
481 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, Bind(callback, result));