Disable TabDragController tests that fail with a real compositor.
[chromium-blink-merge.git] / chrome / browser / devtools / devtools_file_system_indexer.cc
blobb5826e4e4df77e5241d1b5de5f0fbfbf0f9350dc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/devtools/devtools_file_system_indexer.h"
7 #include <iterator>
9 #include "base/bind.h"
10 #include "base/callback.h"
11 #include "base/file_util.h"
12 #include "base/files/file_enumerator.h"
13 #include "base/files/file_util_proxy.h"
14 #include "base/lazy_instance.h"
15 #include "base/logging.h"
16 #include "base/platform_file.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "content/public/browser/browser_thread.h"
20 using base::Bind;
21 using base::Callback;
22 using base::FileEnumerator;
23 using base::FilePath;
24 using base::FileUtilProxy;
25 using base::Time;
26 using base::TimeDelta;
27 using base::TimeTicks;
28 using base::PassPlatformFile;
29 using base::PlatformFile;
30 using base::PlatformFileError;
31 using content::BrowserThread;
32 using std::map;
33 using std::set;
34 using std::string;
35 using std::vector;
37 namespace {
39 typedef int32 Trigram;
40 typedef char TrigramChar;
41 typedef uint16 FileId;
43 const int kMinTimeoutBetweenWorkedNitification = 200;
44 // Trigram characters include all ASCII printable characters (32-126) except for
45 // the capital letters, because the index is case insensitive.
46 const size_t kTrigramCharacterCount = 126 - 'Z' - 1 + 'A' - ' ' + 1;
47 const size_t kTrigramCount =
48 kTrigramCharacterCount * kTrigramCharacterCount * kTrigramCharacterCount;
49 const int kMaxReadLength = 10 * 1024;
50 const TrigramChar kUndefinedTrigramChar = -1;
51 const Trigram kUndefinedTrigram = -1;
53 base::LazyInstance<vector<bool> >::Leaky g_is_binary_char =
54 LAZY_INSTANCE_INITIALIZER;
56 base::LazyInstance<vector<TrigramChar> >::Leaky g_trigram_chars =
57 LAZY_INSTANCE_INITIALIZER;
59 class Index {
60 public:
61 Index();
62 Time LastModifiedTimeForFile(const FilePath& file_path);
63 void SetTrigramsForFile(const FilePath& file_path,
64 const vector<Trigram>& index,
65 const Time& time);
66 vector<FilePath> Search(string query);
67 void PrintStats();
68 void NormalizeVectors();
70 private:
71 ~Index();
73 FileId GetFileId(const FilePath& file_path);
75 typedef map<FilePath, FileId> FileIdsMap;
76 FileIdsMap file_ids_;
77 FileId last_file_id_;
78 // The index in this vector is the trigram id.
79 vector<vector<FileId> > index_;
80 typedef map<FilePath, Time> IndexedFilesMap;
81 IndexedFilesMap index_times_;
82 vector<bool> is_normalized_;
84 DISALLOW_COPY_AND_ASSIGN(Index);
87 base::LazyInstance<Index>::Leaky g_trigram_index = LAZY_INSTANCE_INITIALIZER;
89 void InitIsBinaryCharMap() {
90 for (size_t i = 0; i < 256; ++i) {
91 unsigned char ch = i;
92 bool is_binary_char = ch < 9 || (ch >= 14 && ch < 32) || ch == 127;
93 g_is_binary_char.Get().push_back(is_binary_char);
97 bool IsBinaryChar(char c) {
98 unsigned char uc = static_cast<unsigned char>(c);
99 return g_is_binary_char.Get()[uc];
102 void InitTrigramCharsMap() {
103 for (size_t i = 0; i < 256; ++i) {
104 if (i > 127) {
105 g_trigram_chars.Get().push_back(kUndefinedTrigramChar);
106 continue;
108 char ch = i;
109 if (ch == '\t')
110 ch = ' ';
111 if (ch >= 'A' && ch <= 'Z')
112 ch = ch - 'A' + 'a';
113 if ((IsBinaryChar(ch)) || (ch < ' ')) {
114 g_trigram_chars.Get().push_back(kUndefinedTrigramChar);
115 continue;
118 if (ch >= 'Z')
119 ch = ch - 'Z' - 1 + 'A';
120 ch -= ' ';
121 char signed_trigram_char_count = static_cast<char>(kTrigramCharacterCount);
122 CHECK(ch >= 0 && ch < signed_trigram_char_count);
123 g_trigram_chars.Get().push_back(ch);
127 TrigramChar TrigramCharForChar(char c) {
128 unsigned char uc = static_cast<unsigned char>(c);
129 return g_trigram_chars.Get()[uc];
132 Trigram TrigramAtIndex(const vector<TrigramChar>& trigram_chars, size_t index) {
133 static int kTrigramCharacterCountSquared =
134 kTrigramCharacterCount * kTrigramCharacterCount;
135 if (trigram_chars[index] == kUndefinedTrigramChar ||
136 trigram_chars[index + 1] == kUndefinedTrigramChar ||
137 trigram_chars[index + 2] == kUndefinedTrigramChar)
138 return kUndefinedTrigram;
139 Trigram trigram = kTrigramCharacterCountSquared * trigram_chars[index] +
140 kTrigramCharacterCount * trigram_chars[index + 1] +
141 trigram_chars[index + 2];
142 return trigram;
145 Index::Index() : last_file_id_(0) {
146 index_.resize(kTrigramCount);
147 is_normalized_.resize(kTrigramCount);
148 std::fill(is_normalized_.begin(), is_normalized_.end(), true);
151 Index::~Index() {}
153 Time Index::LastModifiedTimeForFile(const FilePath& file_path) {
154 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
155 Time last_modified_time;
156 if (index_times_.find(file_path) != index_times_.end())
157 last_modified_time = index_times_[file_path];
158 return last_modified_time;
161 void Index::SetTrigramsForFile(const FilePath& file_path,
162 const vector<Trigram>& index,
163 const Time& time) {
164 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
165 FileId file_id = GetFileId(file_path);
166 vector<Trigram>::const_iterator it = index.begin();
167 for (; it != index.end(); ++it) {
168 Trigram trigram = *it;
169 index_[trigram].push_back(file_id);
170 is_normalized_[trigram] = false;
172 index_times_[file_path] = time;
175 vector<FilePath> Index::Search(string query) {
176 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
177 const char* data = query.c_str();
178 vector<TrigramChar> trigram_chars;
179 trigram_chars.reserve(query.size());
180 for (size_t i = 0; i < query.size(); ++i)
181 trigram_chars.push_back(TrigramCharForChar(data[i]));
182 vector<Trigram> trigrams;
183 for (size_t i = 0; i + 2 < query.size(); ++i) {
184 Trigram trigram = TrigramAtIndex(trigram_chars, i);
185 if (trigram != kUndefinedTrigram)
186 trigrams.push_back(trigram);
188 set<FileId> file_ids;
189 bool first = true;
190 vector<Trigram>::const_iterator it = trigrams.begin();
191 for (; it != trigrams.end(); ++it) {
192 Trigram trigram = *it;
193 if (first) {
194 std::copy(index_[trigram].begin(),
195 index_[trigram].end(),
196 std::inserter(file_ids, file_ids.begin()));
197 first = false;
198 continue;
200 set<FileId> intersection;
201 std::set_intersection(file_ids.begin(),
202 file_ids.end(),
203 index_[trigram].begin(),
204 index_[trigram].end(),
205 std::inserter(intersection, intersection.begin()));
206 file_ids.swap(intersection);
208 vector<FilePath> result;
209 FileIdsMap::const_iterator ids_it = file_ids_.begin();
210 for (; ids_it != file_ids_.end(); ++ids_it) {
211 if (trigrams.size() == 0 ||
212 file_ids.find(ids_it->second) != file_ids.end()) {
213 result.push_back(ids_it->first);
216 return result;
219 FileId Index::GetFileId(const FilePath& file_path) {
220 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
221 string file_path_str = file_path.AsUTF8Unsafe();
222 if (file_ids_.find(file_path) != file_ids_.end())
223 return file_ids_[file_path];
224 file_ids_[file_path] = ++last_file_id_;
225 return last_file_id_;
228 void Index::NormalizeVectors() {
229 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
230 for (size_t i = 0; i < kTrigramCount; ++i) {
231 if (!is_normalized_[i]) {
232 std::sort(index_[i].begin(), index_[i].end());
233 if (index_[i].capacity() > index_[i].size())
234 vector<FileId>(index_[i]).swap(index_[i]);
235 is_normalized_[i] = true;
240 void Index::PrintStats() {
241 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
242 LOG(ERROR) << "Index stats:";
243 size_t size = 0;
244 size_t maxSize = 0;
245 size_t capacity = 0;
246 for (size_t i = 0; i < kTrigramCount; ++i) {
247 if (index_[i].size() > maxSize)
248 maxSize = index_[i].size();
249 size += index_[i].size();
250 capacity += index_[i].capacity();
252 LOG(ERROR) << " - total trigram count: " << size;
253 LOG(ERROR) << " - max file count per trigram: " << maxSize;
254 LOG(ERROR) << " - total vectors capacity " << capacity;
255 size_t total_index_size =
256 capacity * sizeof(FileId) + sizeof(vector<FileId>) * kTrigramCount;
257 LOG(ERROR) << " - estimated total index size " << total_index_size;
260 typedef Callback<void(bool, const vector<bool>&)> IndexerCallback;
262 } // namespace
264 DevToolsFileSystemIndexer::FileSystemIndexingJob::FileSystemIndexingJob(
265 const FilePath& file_system_path,
266 const TotalWorkCallback& total_work_callback,
267 const WorkedCallback& worked_callback,
268 const DoneCallback& done_callback)
269 : file_system_path_(file_system_path),
270 total_work_callback_(total_work_callback),
271 worked_callback_(worked_callback),
272 done_callback_(done_callback),
273 files_indexed_(0),
274 stopped_(false) {
275 current_trigrams_set_.resize(kTrigramCount);
276 current_trigrams_.reserve(kTrigramCount);
279 DevToolsFileSystemIndexer::FileSystemIndexingJob::~FileSystemIndexingJob() {}
281 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Start() {
282 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
283 BrowserThread::PostTask(
284 BrowserThread::FILE,
285 FROM_HERE,
286 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
289 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Stop() {
290 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
291 BrowserThread::PostTask(BrowserThread::FILE,
292 FROM_HERE,
293 Bind(&FileSystemIndexingJob::StopOnFileThread, this));
296 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StopOnFileThread() {
297 stopped_ = true;
300 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CollectFilesToIndex() {
301 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
302 if (stopped_)
303 return;
304 if (!file_enumerator_) {
305 file_enumerator_.reset(
306 new FileEnumerator(file_system_path_, true, FileEnumerator::FILES));
308 FilePath file_path = file_enumerator_->Next();
309 if (file_path.empty()) {
310 BrowserThread::PostTask(
311 BrowserThread::UI,
312 FROM_HERE,
313 Bind(total_work_callback_, file_path_times_.size()));
314 indexing_it_ = file_path_times_.begin();
315 IndexFiles();
316 return;
318 Time saved_last_modified_time =
319 g_trigram_index.Get().LastModifiedTimeForFile(file_path);
320 FileEnumerator::FileInfo file_info = file_enumerator_->GetInfo();
321 Time current_last_modified_time = file_info.GetLastModifiedTime();
322 if (current_last_modified_time > saved_last_modified_time) {
323 file_path_times_[file_path] = current_last_modified_time;
325 BrowserThread::PostTask(
326 BrowserThread::FILE,
327 FROM_HERE,
328 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
331 void DevToolsFileSystemIndexer::FileSystemIndexingJob::IndexFiles() {
332 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
333 if (stopped_)
334 return;
335 if (indexing_it_ == file_path_times_.end()) {
336 g_trigram_index.Get().NormalizeVectors();
337 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, done_callback_);
338 return;
340 FilePath file_path = indexing_it_->first;
341 FileUtilProxy::CreateOrOpen(
342 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE).get(),
343 file_path,
344 base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ,
345 Bind(&FileSystemIndexingJob::StartFileIndexing, this));
348 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StartFileIndexing(
349 PlatformFileError error,
350 PassPlatformFile pass_file,
351 bool) {
352 if (error != base::PLATFORM_FILE_OK) {
353 current_file_ = base::kInvalidPlatformFileValue;
354 FinishFileIndexing(false);
355 return;
357 current_file_ = pass_file.ReleaseValue();
358 current_file_offset_ = 0;
359 current_trigrams_.clear();
360 std::fill(current_trigrams_set_.begin(), current_trigrams_set_.end(), false);
361 ReadFromFile();
364 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReadFromFile() {
365 if (stopped_) {
366 CloseFile();
367 return;
369 FileUtilProxy::Read(
370 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE).get(),
371 current_file_,
372 current_file_offset_,
373 kMaxReadLength,
374 Bind(&FileSystemIndexingJob::OnRead, this));
377 void DevToolsFileSystemIndexer::FileSystemIndexingJob::OnRead(
378 PlatformFileError error,
379 const char* data,
380 int bytes_read) {
381 if (error != base::PLATFORM_FILE_OK) {
382 FinishFileIndexing(false);
383 return;
386 if (!bytes_read || bytes_read < 3) {
387 FinishFileIndexing(true);
388 return;
391 size_t size = static_cast<size_t>(bytes_read);
392 vector<TrigramChar> trigram_chars;
393 trigram_chars.reserve(size);
394 for (size_t i = 0; i < size; ++i) {
395 if (IsBinaryChar(data[i])) {
396 current_trigrams_.clear();
397 FinishFileIndexing(true);
398 return;
400 trigram_chars.push_back(TrigramCharForChar(data[i]));
403 for (size_t i = 0; i + 2 < size; ++i) {
404 Trigram trigram = TrigramAtIndex(trigram_chars, i);
405 if ((trigram != kUndefinedTrigram) && !current_trigrams_set_[trigram]) {
406 current_trigrams_set_[trigram] = true;
407 current_trigrams_.push_back(trigram);
410 current_file_offset_ += bytes_read - 2;
411 ReadFromFile();
414 void DevToolsFileSystemIndexer::FileSystemIndexingJob::FinishFileIndexing(
415 bool success) {
416 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
417 CloseFile();
418 if (success) {
419 FilePath file_path = indexing_it_->first;
420 g_trigram_index.Get().SetTrigramsForFile(
421 file_path, current_trigrams_, file_path_times_[file_path]);
423 ReportWorked();
424 ++indexing_it_;
425 IndexFiles();
428 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseFile() {
429 if (current_file_ != base::kInvalidPlatformFileValue) {
430 FileUtilProxy::Close(
431 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE).get(),
432 current_file_,
433 Bind(&FileSystemIndexingJob::CloseCallback, this));
437 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseCallback(
438 PlatformFileError error) {}
440 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReportWorked() {
441 TimeTicks current_time = TimeTicks::Now();
442 bool should_send_worked_nitification = true;
443 if (!last_worked_notification_time_.is_null()) {
444 TimeDelta delta = current_time - last_worked_notification_time_;
445 if (delta.InMilliseconds() < kMinTimeoutBetweenWorkedNitification)
446 should_send_worked_nitification = false;
448 ++files_indexed_;
449 if (should_send_worked_nitification) {
450 last_worked_notification_time_ = current_time;
451 BrowserThread::PostTask(
452 BrowserThread::UI, FROM_HERE, Bind(worked_callback_, files_indexed_));
453 files_indexed_ = 0;
457 DevToolsFileSystemIndexer::DevToolsFileSystemIndexer() {
458 static bool maps_initialized = false;
459 if (!maps_initialized) {
460 InitIsBinaryCharMap();
461 InitTrigramCharsMap();
462 maps_initialized = true;
466 DevToolsFileSystemIndexer::~DevToolsFileSystemIndexer() {}
468 scoped_refptr<DevToolsFileSystemIndexer::FileSystemIndexingJob>
469 DevToolsFileSystemIndexer::IndexPath(
470 const string& file_system_path,
471 const TotalWorkCallback& total_work_callback,
472 const WorkedCallback& worked_callback,
473 const DoneCallback& done_callback) {
474 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
475 scoped_refptr<FileSystemIndexingJob> indexing_job =
476 new FileSystemIndexingJob(FilePath::FromUTF8Unsafe(file_system_path),
477 total_work_callback,
478 worked_callback,
479 done_callback);
480 indexing_job->Start();
481 return indexing_job;
484 void DevToolsFileSystemIndexer::SearchInPath(const string& file_system_path,
485 const string& query,
486 const SearchCallback& callback) {
487 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
488 BrowserThread::PostTask(
489 BrowserThread::FILE,
490 FROM_HERE,
491 Bind(&DevToolsFileSystemIndexer::SearchInPathOnFileThread,
492 this,
493 file_system_path,
494 query,
495 callback));
498 void DevToolsFileSystemIndexer::SearchInPathOnFileThread(
499 const string& file_system_path,
500 const string& query,
501 const SearchCallback& callback) {
502 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
503 vector<FilePath> file_paths = g_trigram_index.Get().Search(query);
504 vector<string> result;
505 FilePath path = FilePath::FromUTF8Unsafe(file_system_path);
506 vector<FilePath>::const_iterator it = file_paths.begin();
507 for (; it != file_paths.end(); ++it) {
508 if (path.IsParent(*it))
509 result.push_back(it->AsUTF8Unsafe());
511 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, Bind(callback, result));