Linux: Depend on liberation-fonts package for RPMs.
[chromium-blink-merge.git] / components / drive / search_metadata.cc
blobfcc35313fa2781e211cf67bdebc125e0ff3ecf8d
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/drive/search_metadata.h"
7 #include <algorithm>
8 #include <queue>
10 #include "base/bind.h"
11 #include "base/i18n/string_search.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/time/time.h"
18 #include "components/drive/drive_api_util.h"
19 #include "components/drive/file_system_core_util.h"
20 #include "net/base/escape.h"
22 namespace drive {
23 namespace internal {
25 namespace {
27 struct ResultCandidate {
28 ResultCandidate(const std::string& local_id,
29 const ResourceEntry& entry,
30 const std::string& highlighted_base_name)
31 : local_id(local_id),
32 entry(entry),
33 highlighted_base_name(highlighted_base_name) {
36 std::string local_id;
37 ResourceEntry entry;
38 std::string highlighted_base_name;
41 // Used to sort the result candidates per the last accessed/modified time. The
42 // recently accessed/modified files come first.
43 bool CompareByTimestamp(const ResourceEntry& a, const ResourceEntry& b) {
44 const PlatformFileInfoProto& a_file_info = a.file_info();
45 const PlatformFileInfoProto& b_file_info = b.file_info();
47 if (a_file_info.last_accessed() != b_file_info.last_accessed())
48 return a_file_info.last_accessed() > b_file_info.last_accessed();
50 // When the entries have the same last access time (which happens quite often
51 // because Drive server doesn't set the field until an entry is viewed via
52 // drive.google.com), we use last modified time as the tie breaker.
53 return a_file_info.last_modified() > b_file_info.last_modified();
56 struct ResultCandidateComparator {
57 bool operator()(const ResultCandidate* a, const ResultCandidate* b) const {
58 return CompareByTimestamp(a->entry, b->entry);
62 // A wrapper of std::priority_queue which deals with pointers of values.
63 template<typename T, typename Compare>
64 class ScopedPriorityQueue {
65 public:
66 ScopedPriorityQueue() {}
68 ~ScopedPriorityQueue() {
69 while (!empty())
70 pop();
73 bool empty() const { return queue_.empty(); }
75 size_t size() const { return queue_.size(); }
77 const T* top() const { return queue_.top(); }
79 void push(T* x) { queue_.push(x); }
81 void pop() {
82 // Keep top alive for the pop() call so that debug checks can access
83 // underlying data (e.g. validating heap property of the priority queue
84 // will call the comparator).
85 T* saved_top = queue_.top();
86 queue_.pop();
87 delete saved_top;
90 private:
91 std::priority_queue<T*, std::vector<T*>, Compare> queue_;
93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue);
96 // Classifies the given entry as hidden if it's not under specific directories.
97 class HiddenEntryClassifier {
98 public:
99 HiddenEntryClassifier(ResourceMetadata* metadata,
100 const std::string& mydrive_local_id)
101 : metadata_(metadata) {
102 // Only things under My Drive and drive/other are not hidden.
103 is_hiding_child_[mydrive_local_id] = false;
104 is_hiding_child_[util::kDriveOtherDirLocalId] = false;
106 // Everything else is hidden, including the directories mentioned above
107 // themselves.
108 is_hiding_child_[""] = true;
111 // |result| is set to true if |entry| is hidden.
112 FileError IsHidden(const ResourceEntry& entry, bool* result) {
113 // Look up for parents recursively.
114 std::vector<std::string> undetermined_ids;
115 undetermined_ids.push_back(entry.parent_local_id());
117 std::map<std::string, bool>::iterator it =
118 is_hiding_child_.find(undetermined_ids.back());
119 for (; it == is_hiding_child_.end();
120 it = is_hiding_child_.find(undetermined_ids.back())) {
121 ResourceEntry parent;
122 FileError error =
123 metadata_->GetResourceEntryById(undetermined_ids.back(), &parent);
124 if (error != FILE_ERROR_OK)
125 return error;
126 undetermined_ids.push_back(parent.parent_local_id());
129 // Cache the result.
130 undetermined_ids.pop_back(); // The last one is already in the map.
131 for (size_t i = 0; i < undetermined_ids.size(); ++i)
132 is_hiding_child_[undetermined_ids[i]] = it->second;
134 *result = it->second;
135 return FILE_ERROR_OK;
138 private:
139 ResourceMetadata* metadata_;
141 // local ID to is_hidden map.
142 std::map<std::string, bool> is_hiding_child_;
145 // Used to implement SearchMetadata.
146 // Adds entry to the result when appropriate.
147 // In particular, if size of |queries| is larger than 0, only adds files with
148 // the name matching the query.
149 FileError MaybeAddEntryToResult(
150 ResourceMetadata* resource_metadata,
151 ResourceMetadata::Iterator* it,
152 const ScopedVector<
153 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents>& queries,
154 const SearchMetadataPredicate& predicate,
155 size_t at_most_num_matches,
156 HiddenEntryClassifier* hidden_entry_classifier,
157 ScopedPriorityQueue<ResultCandidate, ResultCandidateComparator>*
158 result_candidates) {
159 DCHECK_GE(at_most_num_matches, result_candidates->size());
161 const ResourceEntry& entry = it->GetValue();
163 // If the candidate set is already full, and this |entry| is old, do nothing.
164 // We perform this check first in order to avoid the costly find-and-highlight
165 // or FilePath lookup as much as possible.
166 if (result_candidates->size() == at_most_num_matches &&
167 !CompareByTimestamp(entry, result_candidates->top()->entry))
168 return FILE_ERROR_OK;
170 // Add |entry| to the result if the entry is eligible for the given
171 // |options| and matches the query. The base name of the entry must
172 // contain |query| to match the query.
173 std::string highlighted;
174 if (!predicate.Run(entry) ||
175 !FindAndHighlight(entry.base_name(), queries, &highlighted))
176 return FILE_ERROR_OK;
178 // Hidden entry should not be returned.
179 bool hidden = false;
180 FileError error = hidden_entry_classifier->IsHidden(entry, &hidden);
181 if (error != FILE_ERROR_OK || hidden)
182 return error;
184 // Make space for |entry| when appropriate.
185 if (result_candidates->size() == at_most_num_matches)
186 result_candidates->pop();
187 result_candidates->push(new ResultCandidate(it->GetID(), entry, highlighted));
188 return FILE_ERROR_OK;
191 // Implements SearchMetadata().
192 FileError SearchMetadataOnBlockingPool(ResourceMetadata* resource_metadata,
193 const std::string& query_text,
194 const SearchMetadataPredicate& predicate,
195 int at_most_num_matches,
196 MetadataSearchResultVector* results) {
197 ScopedPriorityQueue<ResultCandidate,
198 ResultCandidateComparator> result_candidates;
200 // Prepare data structure for searching.
201 std::vector<base::string16> keywords =
202 base::SplitString(base::UTF8ToUTF16(query_text),
203 base::StringPiece16(base::kWhitespaceUTF16),
204 base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
206 ScopedVector<base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents>
207 queries;
208 for (const auto& keyword : keywords) {
209 queries.push_back(
210 new base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents(
211 keyword));
214 // Prepare an object to filter out hidden entries.
215 ResourceEntry mydrive;
216 FileError error = resource_metadata->GetResourceEntryByPath(
217 util::GetDriveMyDriveRootPath(), &mydrive);
218 if (error != FILE_ERROR_OK)
219 return error;
220 HiddenEntryClassifier hidden_entry_classifier(resource_metadata,
221 mydrive.local_id());
223 // Iterate over entries.
224 scoped_ptr<ResourceMetadata::Iterator> it = resource_metadata->GetIterator();
225 for (; !it->IsAtEnd(); it->Advance()) {
226 FileError error = MaybeAddEntryToResult(
227 resource_metadata, it.get(), queries, predicate, at_most_num_matches,
228 &hidden_entry_classifier, &result_candidates);
229 if (error != FILE_ERROR_OK)
230 return error;
233 // Prepare the result.
234 for (; !result_candidates.empty(); result_candidates.pop()) {
235 const ResultCandidate& candidate = *result_candidates.top();
236 // The path field of entries in result_candidates are empty at this point,
237 // because we don't want to run the expensive metadata DB look up except for
238 // the final results. Hence, here we fill the part.
239 base::FilePath path;
240 error = resource_metadata->GetFilePath(candidate.local_id, &path);
241 if (error != FILE_ERROR_OK)
242 return error;
243 bool is_directory = candidate.entry.file_info().is_directory();
244 results->push_back(MetadataSearchResult(
245 path, is_directory, candidate.highlighted_base_name,
246 candidate.entry.file_specific_info().md5()));
249 // Reverse the order here because |result_candidates| puts the most
250 // uninteresting candidate at the top.
251 std::reverse(results->begin(), results->end());
253 return FILE_ERROR_OK;
256 // Runs the SearchMetadataCallback and updates the histogram.
257 void RunSearchMetadataCallback(const SearchMetadataCallback& callback,
258 const base::TimeTicks& start_time,
259 scoped_ptr<MetadataSearchResultVector> results,
260 FileError error) {
261 if (error != FILE_ERROR_OK)
262 results.reset();
263 callback.Run(error, results.Pass());
265 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
266 base::TimeTicks::Now() - start_time);
269 // Appends substring of |original_text| to |highlighted_text| with highlight.
270 void AppendStringWithHighlight(const base::string16& original_text,
271 size_t start,
272 size_t length,
273 bool highlight,
274 std::string* highlighted_text) {
275 if (highlight)
276 highlighted_text->append("<b>");
278 highlighted_text->append(net::EscapeForHTML(
279 base::UTF16ToUTF8(original_text.substr(start, length))));
281 if (highlight)
282 highlighted_text->append("</b>");
285 } // namespace
287 void SearchMetadata(
288 scoped_refptr<base::SequencedTaskRunner> blocking_task_runner,
289 ResourceMetadata* resource_metadata,
290 const std::string& query,
291 const SearchMetadataPredicate& predicate,
292 size_t at_most_num_matches,
293 const SearchMetadataCallback& callback) {
294 DCHECK(!callback.is_null());
296 const base::TimeTicks start_time = base::TimeTicks::Now();
298 scoped_ptr<MetadataSearchResultVector> results(
299 new MetadataSearchResultVector);
300 MetadataSearchResultVector* results_ptr = results.get();
301 base::PostTaskAndReplyWithResult(
302 blocking_task_runner.get(), FROM_HERE,
303 base::Bind(&SearchMetadataOnBlockingPool, resource_metadata, query,
304 predicate, at_most_num_matches, results_ptr),
305 base::Bind(&RunSearchMetadataCallback, callback, start_time,
306 base::Passed(&results)));
309 bool MatchesType(int options, const ResourceEntry& entry) {
310 if ((options & SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS) &&
311 entry.file_specific_info().is_hosted_document())
312 return false;
314 if ((options & SEARCH_METADATA_EXCLUDE_DIRECTORIES) &&
315 entry.file_info().is_directory())
316 return false;
318 if (options & SEARCH_METADATA_SHARED_WITH_ME)
319 return entry.shared_with_me();
321 if (options & SEARCH_METADATA_OFFLINE) {
322 if (entry.file_specific_info().is_hosted_document()) {
323 // Not all hosted documents are cached by Drive offline app.
324 // https://support.google.com/drive/answer/2375012
325 std::string mime_type = entry.file_specific_info().content_mime_type();
326 return mime_type == drive::util::kGoogleDocumentMimeType ||
327 mime_type == drive::util::kGoogleSpreadsheetMimeType ||
328 mime_type == drive::util::kGooglePresentationMimeType ||
329 mime_type == drive::util::kGoogleDrawingMimeType;
330 } else {
331 return entry.file_specific_info().cache_state().is_present();
335 return true;
338 bool FindAndHighlight(
339 const std::string& text,
340 const ScopedVector<
341 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents>& queries,
342 std::string* highlighted_text) {
343 DCHECK(highlighted_text);
344 highlighted_text->clear();
346 // Check text matches with all queries.
347 size_t match_start = 0;
348 size_t match_length = 0;
350 base::string16 text16 = base::UTF8ToUTF16(text);
351 std::vector<bool> highlights(text16.size(), false);
352 for (auto* query : queries) {
353 if (!query->Search(text16, &match_start, &match_length))
354 return false;
356 std::fill(highlights.begin() + match_start,
357 highlights.begin() + match_start + match_length, true);
360 // Generate highlighted text.
361 size_t start_current_segment = 0;
363 for (size_t i = 0; i < text16.size(); ++i) {
364 if (highlights[start_current_segment] == highlights[i])
365 continue;
367 AppendStringWithHighlight(
368 text16, start_current_segment, i - start_current_segment,
369 highlights[start_current_segment], highlighted_text);
371 start_current_segment = i;
374 DCHECK_GE(text16.size(), start_current_segment);
375 AppendStringWithHighlight(
376 text16, start_current_segment, text16.size() - start_current_segment,
377 highlights[start_current_segment], highlighted_text);
379 return true;
382 } // namespace internal
383 } // namespace drive