cros: Update SAML flow.
[chromium-blink-merge.git] / chrome / browser / devtools / devtools_file_system_indexer.cc
blob9a281db855c615623c936319eb8a2d27c6ea1522
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/devtools/devtools_file_system_indexer.h"
7 #include <iterator>
9 #include "base/bind.h"
10 #include "base/callback.h"
11 #include "base/file_util.h"
12 #include "base/files/file_enumerator.h"
13 #include "base/files/file_util_proxy.h"
14 #include "base/lazy_instance.h"
15 #include "base/logging.h"
16 #include "base/platform_file.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "content/public/browser/browser_thread.h"
20 using base::Bind;
21 using base::Callback;
22 using base::FileEnumerator;
23 using base::FilePath;
24 using base::FileUtilProxy;
25 using base::Time;
26 using base::TimeDelta;
27 using base::TimeTicks;
28 using base::PassPlatformFile;
29 using base::PlatformFile;
30 using content::BrowserThread;
31 using std::map;
32 using std::set;
33 using std::string;
34 using std::vector;
36 namespace {
38 typedef int32 Trigram;
39 typedef char TrigramChar;
40 typedef uint16 FileId;
42 const int kMinTimeoutBetweenWorkedNitification = 200;
43 // Trigram characters include all ASCII printable characters (32-126) except for
44 // the capital letters, because the index is case insensitive.
45 const size_t kTrigramCharacterCount = 126 - 'Z' - 1 + 'A' - ' ' + 1;
46 const size_t kTrigramCount =
47 kTrigramCharacterCount * kTrigramCharacterCount * kTrigramCharacterCount;
48 const int kMaxReadLength = 10 * 1024;
49 const TrigramChar kUndefinedTrigramChar = -1;
50 const Trigram kUndefinedTrigram = -1;
52 base::LazyInstance<vector<bool> >::Leaky g_is_binary_char =
53 LAZY_INSTANCE_INITIALIZER;
55 base::LazyInstance<vector<TrigramChar> >::Leaky g_trigram_chars =
56 LAZY_INSTANCE_INITIALIZER;
58 class Index {
59 public:
60 Index();
61 Time LastModifiedTimeForFile(const FilePath& file_path);
62 void SetTrigramsForFile(const FilePath& file_path,
63 const vector<Trigram>& index,
64 const Time& time);
65 vector<FilePath> Search(string query);
66 void PrintStats();
67 void NormalizeVectors();
69 private:
70 ~Index();
72 FileId GetFileId(const FilePath& file_path);
74 typedef map<FilePath, FileId> FileIdsMap;
75 FileIdsMap file_ids_;
76 FileId last_file_id_;
77 // The index in this vector is the trigram id.
78 vector<vector<FileId> > index_;
79 typedef map<FilePath, Time> IndexedFilesMap;
80 IndexedFilesMap index_times_;
81 vector<bool> is_normalized_;
83 DISALLOW_COPY_AND_ASSIGN(Index);
86 base::LazyInstance<Index>::Leaky g_trigram_index = LAZY_INSTANCE_INITIALIZER;
88 void InitIsBinaryCharMap() {
89 for (size_t i = 0; i < 256; ++i) {
90 unsigned char ch = i;
91 bool is_binary_char = ch < 9 || (ch >= 14 && ch < 32) || ch == 127;
92 g_is_binary_char.Get().push_back(is_binary_char);
96 bool IsBinaryChar(char c) {
97 unsigned char uc = static_cast<unsigned char>(c);
98 return g_is_binary_char.Get()[uc];
101 void InitTrigramCharsMap() {
102 for (size_t i = 0; i < 256; ++i) {
103 if (i > 127) {
104 g_trigram_chars.Get().push_back(kUndefinedTrigramChar);
105 continue;
107 char ch = i;
108 if (ch == '\t')
109 ch = ' ';
110 if (ch >= 'A' && ch <= 'Z')
111 ch = ch - 'A' + 'a';
112 if ((IsBinaryChar(ch)) || (ch < ' ')) {
113 g_trigram_chars.Get().push_back(kUndefinedTrigramChar);
114 continue;
117 if (ch >= 'Z')
118 ch = ch - 'Z' - 1 + 'A';
119 ch -= ' ';
120 char signed_trigram_char_count = static_cast<char>(kTrigramCharacterCount);
121 CHECK(ch >= 0 && ch < signed_trigram_char_count);
122 g_trigram_chars.Get().push_back(ch);
126 TrigramChar TrigramCharForChar(char c) {
127 unsigned char uc = static_cast<unsigned char>(c);
128 return g_trigram_chars.Get()[uc];
131 Trigram TrigramAtIndex(const vector<TrigramChar>& trigram_chars, size_t index) {
132 static int kTrigramCharacterCountSquared =
133 kTrigramCharacterCount * kTrigramCharacterCount;
134 if (trigram_chars[index] == kUndefinedTrigramChar ||
135 trigram_chars[index + 1] == kUndefinedTrigramChar ||
136 trigram_chars[index + 2] == kUndefinedTrigramChar)
137 return kUndefinedTrigram;
138 Trigram trigram = kTrigramCharacterCountSquared * trigram_chars[index] +
139 kTrigramCharacterCount * trigram_chars[index + 1] +
140 trigram_chars[index + 2];
141 return trigram;
144 Index::Index() : last_file_id_(0) {
145 index_.resize(kTrigramCount);
146 is_normalized_.resize(kTrigramCount);
147 std::fill(is_normalized_.begin(), is_normalized_.end(), true);
150 Index::~Index() {}
152 Time Index::LastModifiedTimeForFile(const FilePath& file_path) {
153 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
154 Time last_modified_time;
155 if (index_times_.find(file_path) != index_times_.end())
156 last_modified_time = index_times_[file_path];
157 return last_modified_time;
160 void Index::SetTrigramsForFile(const FilePath& file_path,
161 const vector<Trigram>& index,
162 const Time& time) {
163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
164 FileId file_id = GetFileId(file_path);
165 vector<Trigram>::const_iterator it = index.begin();
166 for (; it != index.end(); ++it) {
167 Trigram trigram = *it;
168 index_[trigram].push_back(file_id);
169 is_normalized_[trigram] = false;
171 index_times_[file_path] = time;
174 vector<FilePath> Index::Search(string query) {
175 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
176 const char* data = query.c_str();
177 vector<TrigramChar> trigram_chars;
178 trigram_chars.reserve(query.size());
179 for (size_t i = 0; i < query.size(); ++i)
180 trigram_chars.push_back(TrigramCharForChar(data[i]));
181 vector<Trigram> trigrams;
182 for (size_t i = 0; i + 2 < query.size(); ++i) {
183 Trigram trigram = TrigramAtIndex(trigram_chars, i);
184 if (trigram != kUndefinedTrigram)
185 trigrams.push_back(trigram);
187 set<FileId> file_ids;
188 bool first = true;
189 vector<Trigram>::const_iterator it = trigrams.begin();
190 for (; it != trigrams.end(); ++it) {
191 Trigram trigram = *it;
192 if (first) {
193 std::copy(index_[trigram].begin(),
194 index_[trigram].end(),
195 std::inserter(file_ids, file_ids.begin()));
196 first = false;
197 continue;
199 set<FileId> intersection;
200 std::set_intersection(file_ids.begin(),
201 file_ids.end(),
202 index_[trigram].begin(),
203 index_[trigram].end(),
204 std::inserter(intersection, intersection.begin()));
205 file_ids.swap(intersection);
207 vector<FilePath> result;
208 FileIdsMap::const_iterator ids_it = file_ids_.begin();
209 for (; ids_it != file_ids_.end(); ++ids_it) {
210 if (trigrams.size() == 0 ||
211 file_ids.find(ids_it->second) != file_ids.end()) {
212 result.push_back(ids_it->first);
215 return result;
218 FileId Index::GetFileId(const FilePath& file_path) {
219 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
220 string file_path_str = file_path.AsUTF8Unsafe();
221 if (file_ids_.find(file_path) != file_ids_.end())
222 return file_ids_[file_path];
223 file_ids_[file_path] = ++last_file_id_;
224 return last_file_id_;
227 void Index::NormalizeVectors() {
228 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
229 for (size_t i = 0; i < kTrigramCount; ++i) {
230 if (!is_normalized_[i]) {
231 std::sort(index_[i].begin(), index_[i].end());
232 if (index_[i].capacity() > index_[i].size())
233 vector<FileId>(index_[i]).swap(index_[i]);
234 is_normalized_[i] = true;
239 void Index::PrintStats() {
240 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
241 LOG(ERROR) << "Index stats:";
242 size_t size = 0;
243 size_t maxSize = 0;
244 size_t capacity = 0;
245 for (size_t i = 0; i < kTrigramCount; ++i) {
246 if (index_[i].size() > maxSize)
247 maxSize = index_[i].size();
248 size += index_[i].size();
249 capacity += index_[i].capacity();
251 LOG(ERROR) << " - total trigram count: " << size;
252 LOG(ERROR) << " - max file count per trigram: " << maxSize;
253 LOG(ERROR) << " - total vectors capacity " << capacity;
254 size_t total_index_size =
255 capacity * sizeof(FileId) + sizeof(vector<FileId>) * kTrigramCount;
256 LOG(ERROR) << " - estimated total index size " << total_index_size;
259 typedef Callback<void(bool, const vector<bool>&)> IndexerCallback;
261 } // namespace
263 DevToolsFileSystemIndexer::FileSystemIndexingJob::FileSystemIndexingJob(
264 const FilePath& file_system_path,
265 const TotalWorkCallback& total_work_callback,
266 const WorkedCallback& worked_callback,
267 const DoneCallback& done_callback)
268 : file_system_path_(file_system_path),
269 total_work_callback_(total_work_callback),
270 worked_callback_(worked_callback),
271 done_callback_(done_callback),
272 files_indexed_(0),
273 stopped_(false) {
274 current_trigrams_set_.resize(kTrigramCount);
275 current_trigrams_.reserve(kTrigramCount);
278 DevToolsFileSystemIndexer::FileSystemIndexingJob::~FileSystemIndexingJob() {}
280 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Start() {
281 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
282 BrowserThread::PostTask(
283 BrowserThread::FILE,
284 FROM_HERE,
285 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
288 void DevToolsFileSystemIndexer::FileSystemIndexingJob::Stop() {
289 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
290 BrowserThread::PostTask(BrowserThread::FILE,
291 FROM_HERE,
292 Bind(&FileSystemIndexingJob::StopOnFileThread, this));
295 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StopOnFileThread() {
296 stopped_ = true;
299 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CollectFilesToIndex() {
300 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
301 if (stopped_)
302 return;
303 if (!file_enumerator_) {
304 file_enumerator_.reset(
305 new FileEnumerator(file_system_path_, true, FileEnumerator::FILES));
307 FilePath file_path = file_enumerator_->Next();
308 if (file_path.empty()) {
309 BrowserThread::PostTask(
310 BrowserThread::UI,
311 FROM_HERE,
312 Bind(total_work_callback_, file_path_times_.size()));
313 indexing_it_ = file_path_times_.begin();
314 IndexFiles();
315 return;
317 Time saved_last_modified_time =
318 g_trigram_index.Get().LastModifiedTimeForFile(file_path);
319 FileEnumerator::FileInfo file_info = file_enumerator_->GetInfo();
320 Time current_last_modified_time = file_info.GetLastModifiedTime();
321 if (current_last_modified_time > saved_last_modified_time) {
322 file_path_times_[file_path] = current_last_modified_time;
324 BrowserThread::PostTask(
325 BrowserThread::FILE,
326 FROM_HERE,
327 Bind(&FileSystemIndexingJob::CollectFilesToIndex, this));
330 void DevToolsFileSystemIndexer::FileSystemIndexingJob::IndexFiles() {
331 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
332 if (stopped_)
333 return;
334 if (indexing_it_ == file_path_times_.end()) {
335 g_trigram_index.Get().NormalizeVectors();
336 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, done_callback_);
337 return;
339 FilePath file_path = indexing_it_->first;
340 FileUtilProxy::CreateOrOpen(
341 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE).get(),
342 file_path,
343 base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ,
344 Bind(&FileSystemIndexingJob::StartFileIndexing, this));
347 void DevToolsFileSystemIndexer::FileSystemIndexingJob::StartFileIndexing(
348 base::File::Error error,
349 PassPlatformFile pass_file,
350 bool) {
351 if (error != base::File::FILE_OK) {
352 current_file_ = base::kInvalidPlatformFileValue;
353 FinishFileIndexing(false);
354 return;
356 current_file_ = pass_file.ReleaseValue();
357 current_file_offset_ = 0;
358 current_trigrams_.clear();
359 std::fill(current_trigrams_set_.begin(), current_trigrams_set_.end(), false);
360 ReadFromFile();
363 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReadFromFile() {
364 if (stopped_) {
365 CloseFile();
366 return;
368 FileUtilProxy::Read(
369 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE).get(),
370 current_file_,
371 current_file_offset_,
372 kMaxReadLength,
373 Bind(&FileSystemIndexingJob::OnRead, this));
376 void DevToolsFileSystemIndexer::FileSystemIndexingJob::OnRead(
377 base::File::Error error,
378 const char* data,
379 int bytes_read) {
380 if (error != base::File::FILE_OK) {
381 FinishFileIndexing(false);
382 return;
385 if (!bytes_read || bytes_read < 3) {
386 FinishFileIndexing(true);
387 return;
390 size_t size = static_cast<size_t>(bytes_read);
391 vector<TrigramChar> trigram_chars;
392 trigram_chars.reserve(size);
393 for (size_t i = 0; i < size; ++i) {
394 if (IsBinaryChar(data[i])) {
395 current_trigrams_.clear();
396 FinishFileIndexing(true);
397 return;
399 trigram_chars.push_back(TrigramCharForChar(data[i]));
402 for (size_t i = 0; i + 2 < size; ++i) {
403 Trigram trigram = TrigramAtIndex(trigram_chars, i);
404 if ((trigram != kUndefinedTrigram) && !current_trigrams_set_[trigram]) {
405 current_trigrams_set_[trigram] = true;
406 current_trigrams_.push_back(trigram);
409 current_file_offset_ += bytes_read - 2;
410 ReadFromFile();
413 void DevToolsFileSystemIndexer::FileSystemIndexingJob::FinishFileIndexing(
414 bool success) {
415 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
416 CloseFile();
417 if (success) {
418 FilePath file_path = indexing_it_->first;
419 g_trigram_index.Get().SetTrigramsForFile(
420 file_path, current_trigrams_, file_path_times_[file_path]);
422 ReportWorked();
423 ++indexing_it_;
424 IndexFiles();
427 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseFile() {
428 if (current_file_ != base::kInvalidPlatformFileValue) {
429 FileUtilProxy::Close(
430 BrowserThread::GetMessageLoopProxyForThread(BrowserThread::FILE).get(),
431 current_file_,
432 Bind(&FileSystemIndexingJob::CloseCallback, this));
436 void DevToolsFileSystemIndexer::FileSystemIndexingJob::CloseCallback(
437 base::File::Error error) {}
439 void DevToolsFileSystemIndexer::FileSystemIndexingJob::ReportWorked() {
440 TimeTicks current_time = TimeTicks::Now();
441 bool should_send_worked_nitification = true;
442 if (!last_worked_notification_time_.is_null()) {
443 TimeDelta delta = current_time - last_worked_notification_time_;
444 if (delta.InMilliseconds() < kMinTimeoutBetweenWorkedNitification)
445 should_send_worked_nitification = false;
447 ++files_indexed_;
448 if (should_send_worked_nitification) {
449 last_worked_notification_time_ = current_time;
450 BrowserThread::PostTask(
451 BrowserThread::UI, FROM_HERE, Bind(worked_callback_, files_indexed_));
452 files_indexed_ = 0;
456 DevToolsFileSystemIndexer::DevToolsFileSystemIndexer() {
457 static bool maps_initialized = false;
458 if (!maps_initialized) {
459 InitIsBinaryCharMap();
460 InitTrigramCharsMap();
461 maps_initialized = true;
465 DevToolsFileSystemIndexer::~DevToolsFileSystemIndexer() {}
467 scoped_refptr<DevToolsFileSystemIndexer::FileSystemIndexingJob>
468 DevToolsFileSystemIndexer::IndexPath(
469 const string& file_system_path,
470 const TotalWorkCallback& total_work_callback,
471 const WorkedCallback& worked_callback,
472 const DoneCallback& done_callback) {
473 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
474 scoped_refptr<FileSystemIndexingJob> indexing_job =
475 new FileSystemIndexingJob(FilePath::FromUTF8Unsafe(file_system_path),
476 total_work_callback,
477 worked_callback,
478 done_callback);
479 indexing_job->Start();
480 return indexing_job;
483 void DevToolsFileSystemIndexer::SearchInPath(const string& file_system_path,
484 const string& query,
485 const SearchCallback& callback) {
486 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
487 BrowserThread::PostTask(
488 BrowserThread::FILE,
489 FROM_HERE,
490 Bind(&DevToolsFileSystemIndexer::SearchInPathOnFileThread,
491 this,
492 file_system_path,
493 query,
494 callback));
497 void DevToolsFileSystemIndexer::SearchInPathOnFileThread(
498 const string& file_system_path,
499 const string& query,
500 const SearchCallback& callback) {
501 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
502 vector<FilePath> file_paths = g_trigram_index.Get().Search(query);
503 vector<string> result;
504 FilePath path = FilePath::FromUTF8Unsafe(file_system_path);
505 vector<FilePath>::const_iterator it = file_paths.begin();
506 for (; it != file_paths.end(); ++it) {
507 if (path.IsParent(*it))
508 result.push_back(it->AsUTF8Unsafe());
510 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, Bind(callback, result));