Introduce ProfilerMetricsProvider
[chromium-blink-merge.git] / chrome / browser / devtools / devtools_file_system_indexer.cc
blob6f7f80daec65c6b958deaf9184479e9e3527befe
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/devtools/devtools_file_system_indexer.h"
7 #include <iterator>
9 #include "base/bind.h"
10 #include "base/callback.h"
11 #include "base/file_util.h"
12 #include "base/files/file_enumerator.h"
13 #include "base/files/file_util_proxy.h"
14 #include "base/lazy_instance.h"
15 #include "base/logging.h"
16 #include "base/stl_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "content/public/browser/browser_thread.h"
20 using base::Bind;
21 using base::Callback;
22 using base::FileEnumerator;
23 using base::FilePath;
24 using base::Time;
25 using base::TimeDelta;
26 using base::TimeTicks;
27 using content::BrowserThread;
28 using std::map;
29 using std::set;
30 using std::string;
31 using std::vector;
33 namespace {
35 typedef int32 Trigram;
36 typedef char TrigramChar;
37 typedef uint16 FileId;
39 const int kMinTimeoutBetweenWorkedNitification = 200;
40 // Trigram characters include all ASCII printable characters (32-126) except for
41 // the capital letters, because the index is case insensitive.
42 const size_t kTrigramCharacterCount = 126 - 'Z' - 1 + 'A' - ' ' + 1;
43 const size_t kTrigramCount =
44 kTrigramCharacterCount * kTrigramCharacterCount * kTrigramCharacterCount;
45 const int kMaxReadLength = 10 * 1024;
46 const TrigramChar kUndefinedTrigramChar = -1;
47 const Trigram kUndefinedTrigram = -1;
49 base::LazyInstance<vector<bool> >::Leaky g_is_binary_char =
50 LAZY_INSTANCE_INITIALIZER;
52 base::LazyInstance<vector<TrigramChar> >::Leaky g_trigram_chars =
53 LAZY_INSTANCE_INITIALIZER;
55 class Index {
56 public:
57 Index();
58 Time LastModifiedTimeForFile(const FilePath& file_path);
59 void SetTrigramsForFile(const FilePath& file_path,
60 const vector<Trigram>& index,
61 const Time& time);
62 vector<FilePath> Search(string query);
63 void PrintStats();
64 void NormalizeVectors();
66 private:
67 ~Index();
69 FileId GetFileId(const FilePath& file_path);
71 typedef map<FilePath, FileId> FileIdsMap;
72 FileIdsMap file_ids_;
73 FileId last_file_id_;
74 // The index in this vector is the trigram id.
75 vector<vector<FileId> > index_;
76 typedef map<FilePath, Time> IndexedFilesMap;
77 IndexedFilesMap index_times_;
78 vector<bool> is_normalized_;
80 DISALLOW_COPY_AND_ASSIGN(Index);
83 base::LazyInstance<Index>::Leaky g_trigram_index = LAZY_INSTANCE_INITIALIZER;
85 void InitIsBinaryCharMap() {
86 for (size_t i = 0; i < 256; ++i) {
87 unsigned char ch = i;
88 bool is_binary_char = ch < 9 || (ch >= 14 && ch < 32) || ch == 127;
89 g_is_binary_char.Get().push_back(is_binary_char);
93 bool IsBinaryChar(char c) {
94 unsigned char uc = static_cast<unsigned char>(c);
95 return g_is_binary_char.Get()[uc];
98 void InitTrigramCharsMap() {
99 for (size_t i = 0; i < 256; ++i) {
100 if (i > 127) {
101 g_trigram_chars.Get().push_back(kUndefinedTrigramChar);
102 continue;
104 char ch = i;
105 if (ch == '\t')
106 ch = ' ';
107 if (ch >= 'A' && ch <= 'Z')
108 ch = ch - 'A' + 'a';
109 if ((IsBinaryChar(ch)) || (ch < ' ')) {
110 g_trigram_chars.Get().push_back(kUndefinedTrigramChar);
111 continue;
114 if (ch >= 'Z')
115 ch = ch - 'Z' - 1 + 'A';
116 ch -= ' ';
117 char signed_trigram_char_count = static_cast<char>(kTrigramCharacterCount);
118 CHECK(ch >= 0 && ch < signed_trigram_char_count);
119 g_trigram_chars.Get().push_back(ch);
123 TrigramChar TrigramCharForChar(char c) {
124 unsigned char uc = static_cast<unsigned char>(c);
125 return g_trigram_chars.Get()[uc];
128 Trigram TrigramAtIndex(const vector<TrigramChar>& trigram_chars, size_t index) {
129 static int kTrigramCharacterCountSquared =
130 kTrigramCharacterCount * kTrigramCharacterCount;
131 if (trigram_chars[index] == kUndefinedTrigramChar ||
132 trigram_chars[index + 1] == kUndefinedTrigramChar ||
133 trigram_chars[index + 2] == kUndefinedTrigramChar)
134 return kUndefinedTrigram;
135 Trigram trigram = kTrigramCharacterCountSquared * trigram_chars[index] +
136 kTrigramCharacterCount * trigram_chars[index + 1] +
137 trigram_chars[index + 2];
138 return trigram;
141 Index::Index() : last_file_id_(0) {
142 index_.resize(kTrigramCount);
143 is_normalized_.resize(kTrigramCount);
144 std::fill(is_normalized_.begin(), is_normalized_.end(), true);
147 Index::~Index() {}
149 Time Index::LastModifiedTimeForFile(const FilePath& file_path) {
150 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
151 Time last_modified_time;
152 if (index_times_.find(file_path) != index_times_.end())
153 last_modified_time = index_times_[file_path];
154 return last_modified_time;
157 void Index::SetTrigramsForFile(const FilePath& file_path,
158 const vector<Trigram>& index,
159 const Time& time) {
160 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
161 FileId file_id = GetFileId(file_path);
162 vector<Trigram>::const_iterator it = index.begin();
163 for (; it != index.end(); ++it) {
164 Trigram trigram = *it;
165 index_[trigram].push_back(file_id);
166 is_normalized_[trigram] = false;
168 index_times_[file_path] = time;
171 vector<FilePath> Index::Search(string query) {
172 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
173 const char* data = query.c_str();
174 vector<TrigramChar> trigram_chars;
175 trigram_chars.reserve(query.size());
176 for (size_t i = 0; i < query.size(); ++i)
177 trigram_chars.push_back(TrigramCharForChar(data[i]));
178 vector<Trigram> trigrams;
179 for (size_t i = 0; i + 2 < query.size(); ++i) {
180 Trigram trigram = TrigramAtIndex(trigram_chars, i);
181 if (trigram != kUndefinedTrigram)
182 trigrams.push_back(trigram);
184 set<FileId> file_ids;
185 bool first = true;
186 vector<Trigram>::const_iterator it = trigrams.begin();
187 for (; it != trigrams.end(); ++it) {
188 Trigram trigram = *it;
189 if (first) {
190 std::copy(index_[trigram].begin(),
191 index_[trigram].end(),
192 std::inserter(file_ids, file_ids.begin()));
193 first = false;
194 continue;
196 set<FileId> intersection = base::STLSetIntersection<set<FileId> >(
197 file_ids, index_[trigram]);
198 file_ids.swap(intersection);
200 vector<FilePath> result;
201 FileIdsMap::const_iterator ids_it = file_ids_.begin();
202 for (; ids_it != file_ids_.end(); ++ids_it) {
203 if (trigrams.size() == 0 ||
204 file_ids.find(ids_it->second) != file_ids.end()) {
205 result.push_back(ids_it->first);
208 return result;
211 FileId Index::GetFileId(const FilePath& file_path) {
212 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
213 string file_path_str = file_path.AsUTF8Unsafe();
214 if (file_ids_.find(file_path) != file_ids_.end())
215 return file_ids_[file_path];
216 file_ids_[file_path] = ++last_file_id_;
217 return last_file_id_;
220 void Index::NormalizeVectors() {
221 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
222 for (size_t i = 0; i < kTrigramCount; ++i) {
223 if (!is_normalized_[i]) {
224 std::sort(index_[i].begin(), index_[i].end());
225 if (index_[i].capacity() > index_[i].size())
226 vector<FileId>(index_[i]).swap(index_[i]);
227 is_normalized_[i] = true;
232 void Index::PrintStats() {
233 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
234 LOG(ERROR) << "Index stats:";
235 size_t size = 0;
236 size_t maxSize = 0;
237 size_t capacity = 0;
238 for (size_t i = 0; i < kTrigramCount; ++i) {
239 if (index_[i].size() > maxSize)
240 maxSize = index_[i].size();
241 size += index_[i].size();
242 capacity += index_[i].capacity();
244 LOG(ERROR) << " - total trigram count: " << size;
245 LOG(ERROR) << " - max file count per trigram: " << maxSize;
246 LOG(ERROR) << " - total vectors capacity " << capacity;
247 size_t total_index_size =
248 capacity * sizeof(FileId) + sizeof(vector<FileId>) * kTrigramCount;
249 LOG(ERROR) << " - estimated total index size " << total_index_size;
252 typedef Callback<void(bool, const vector<bool>&)> IndexerCallback;
254 } // namespace
256 DevToolsFileSystemIndexer::FileSystemIndexingJob::FileSystemIndexingJob(
257 const FilePath& file_system_path,
258 const TotalWorkCallback& total_work_callback,
259 const WorkedCallback& worked_callback,
260 const DoneCallback& done_callback)
261 : file_system_path_(file_system_path),
262 total_work_callback_(total_work_callback),
263 worked_callback_(worked_callback),
264 done_callback_(done_callback),
265 current_file_(BrowserThread::GetMessageLoopProxyForThread(
266 BrowserThread::FILE).get()),
267 files_indexed_(0),
268 stopped_(false) {
269 current_trigrams_set_.resize(kTrigramCount);
270 current_trigrams_.reserve(kTrigramCount);
273 DevToolsFileSystemIndexer::FileSystemIndexingJob::~FileSystemIndexingJob() {}
275 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Start() {
276 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
277 BrowserThread::PostTask(
278 BrowserThread::FILE,
279 FROM_HERE,
280 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
283 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Stop() {
284 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
285 BrowserThread::PostTask(BrowserThread::FILE,
286 FROM_HERE,
287 Bind(&FileSystemIndexingJob::StopOnFileThread, this));
290 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StopOnFileThread() {
291 stopped_ = true;
294 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CollectFilesToIndex() {
295 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
296 if (stopped_)
297 return;
298 if (!file_enumerator_) {
299 file_enumerator_.reset(
300 new FileEnumerator(file_system_path_, true, FileEnumerator::FILES));
302 FilePath file_path = file_enumerator_->Next();
303 if (file_path.empty()) {
304 BrowserThread::PostTask(
305 BrowserThread::UI,
306 FROM_HERE,
307 Bind(total_work_callback_, file_path_times_.size()));
308 indexing_it_ = file_path_times_.begin();
309 IndexFiles();
310 return;
312 Time saved_last_modified_time =
313 g_trigram_index.Get().LastModifiedTimeForFile(file_path);
314 FileEnumerator::FileInfo file_info = file_enumerator_->GetInfo();
315 Time current_last_modified_time = file_info.GetLastModifiedTime();
316 if (current_last_modified_time > saved_last_modified_time) {
317 file_path_times_[file_path] = current_last_modified_time;
319 BrowserThread::PostTask(
320 BrowserThread::FILE,
321 FROM_HERE,
322 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
325 void DevToolsFileSystemIndexer::FileSystemIndexingJob::IndexFiles() {
326 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
327 if (stopped_)
328 return;
329 if (indexing_it_ == file_path_times_.end()) {
330 g_trigram_index.Get().NormalizeVectors();
331 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, done_callback_);
332 return;
334 FilePath file_path = indexing_it_->first;
335 current_file_.CreateOrOpen(
336 file_path,
337 base::File::FLAG_OPEN | base::File::FLAG_READ,
338 Bind(&FileSystemIndexingJob::StartFileIndexing, this));
341 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StartFileIndexing(
342 base::File::Error error) {
343 if (!current_file_.IsValid()) {
344 FinishFileIndexing(false);
345 return;
347 current_file_offset_ = 0;
348 current_trigrams_.clear();
349 std::fill(current_trigrams_set_.begin(), current_trigrams_set_.end(), false);
350 ReadFromFile();
353 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReadFromFile() {
354 if (stopped_) {
355 CloseFile();
356 return;
358 current_file_.Read(current_file_offset_, kMaxReadLength,
359 Bind(&FileSystemIndexingJob::OnRead, this));
362 void DevToolsFileSystemIndexer::FileSystemIndexingJob::OnRead(
363 base::File::Error error,
364 const char* data,
365 int bytes_read) {
366 if (error != base::File::FILE_OK) {
367 FinishFileIndexing(false);
368 return;
371 if (!bytes_read || bytes_read < 3) {
372 FinishFileIndexing(true);
373 return;
376 size_t size = static_cast<size_t>(bytes_read);
377 vector<TrigramChar> trigram_chars;
378 trigram_chars.reserve(size);
379 for (size_t i = 0; i < size; ++i) {
380 if (IsBinaryChar(data[i])) {
381 current_trigrams_.clear();
382 FinishFileIndexing(true);
383 return;
385 trigram_chars.push_back(TrigramCharForChar(data[i]));
388 for (size_t i = 0; i + 2 < size; ++i) {
389 Trigram trigram = TrigramAtIndex(trigram_chars, i);
390 if ((trigram != kUndefinedTrigram) && !current_trigrams_set_[trigram]) {
391 current_trigrams_set_[trigram] = true;
392 current_trigrams_.push_back(trigram);
395 current_file_offset_ += bytes_read - 2;
396 ReadFromFile();
399 void DevToolsFileSystemIndexer::FileSystemIndexingJob::FinishFileIndexing(
400 bool success) {
401 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
402 CloseFile();
403 if (success) {
404 FilePath file_path = indexing_it_->first;
405 g_trigram_index.Get().SetTrigramsForFile(
406 file_path, current_trigrams_, file_path_times_[file_path]);
408 ReportWorked();
409 ++indexing_it_;
410 IndexFiles();
413 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseFile() {
414 if (current_file_.IsValid())
415 current_file_.Close(Bind(&FileSystemIndexingJob::CloseCallback, this));
418 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseCallback(
419 base::File::Error error) {}
421 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReportWorked() {
422 TimeTicks current_time = TimeTicks::Now();
423 bool should_send_worked_nitification = true;
424 if (!last_worked_notification_time_.is_null()) {
425 TimeDelta delta = current_time - last_worked_notification_time_;
426 if (delta.InMilliseconds() < kMinTimeoutBetweenWorkedNitification)
427 should_send_worked_nitification = false;
429 ++files_indexed_;
430 if (should_send_worked_nitification) {
431 last_worked_notification_time_ = current_time;
432 BrowserThread::PostTask(
433 BrowserThread::UI, FROM_HERE, Bind(worked_callback_, files_indexed_));
434 files_indexed_ = 0;
438 DevToolsFileSystemIndexer::DevToolsFileSystemIndexer() {
439 static bool maps_initialized = false;
440 if (!maps_initialized) {
441 InitIsBinaryCharMap();
442 InitTrigramCharsMap();
443 maps_initialized = true;
447 DevToolsFileSystemIndexer::~DevToolsFileSystemIndexer() {}
449 scoped_refptr<DevToolsFileSystemIndexer::FileSystemIndexingJob>
450 DevToolsFileSystemIndexer::IndexPath(
451 const string& file_system_path,
452 const TotalWorkCallback& total_work_callback,
453 const WorkedCallback& worked_callback,
454 const DoneCallback& done_callback) {
455 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
456 scoped_refptr<FileSystemIndexingJob> indexing_job =
457 new FileSystemIndexingJob(FilePath::FromUTF8Unsafe(file_system_path),
458 total_work_callback,
459 worked_callback,
460 done_callback);
461 indexing_job->Start();
462 return indexing_job;
465 void DevToolsFileSystemIndexer::SearchInPath(const string& file_system_path,
466 const string& query,
467 const SearchCallback& callback) {
468 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
469 BrowserThread::PostTask(
470 BrowserThread::FILE,
471 FROM_HERE,
472 Bind(&DevToolsFileSystemIndexer::SearchInPathOnFileThread,
473 this,
474 file_system_path,
475 query,
476 callback));
479 void DevToolsFileSystemIndexer::SearchInPathOnFileThread(
480 const string& file_system_path,
481 const string& query,
482 const SearchCallback& callback) {
483 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
484 vector<FilePath> file_paths = g_trigram_index.Get().Search(query);
485 vector<string> result;
486 FilePath path = FilePath::FromUTF8Unsafe(file_system_path);
487 vector<FilePath>::const_iterator it = file_paths.begin();
488 for (; it != file_paths.end(); ++it) {
489 if (path.IsParent(*it))
490 result.push_back(it->AsUTF8Unsafe());
492 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, Bind(callback, result));