1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "components/drive/search_metadata.h"
10 #include "base/bind.h"
11 #include "base/i18n/string_search.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/string_piece.h"
14 #include "base/strings/string_split.h"
15 #include "base/strings/string_util.h"
16 #include "base/strings/utf_string_conversions.h"
17 #include "base/time/time.h"
18 #include "components/drive/drive_api_util.h"
19 #include "components/drive/file_system_core_util.h"
20 #include "net/base/escape.h"
27 struct ResultCandidate
{
28 ResultCandidate(const std::string
& local_id
,
29 const ResourceEntry
& entry
,
30 const std::string
& highlighted_base_name
)
33 highlighted_base_name(highlighted_base_name
) {
38 std::string highlighted_base_name
;
41 // Used to sort the result candidates per the last accessed/modified time. The
42 // recently accessed/modified files come first.
43 bool CompareByTimestamp(const ResourceEntry
& a
, const ResourceEntry
& b
) {
44 const PlatformFileInfoProto
& a_file_info
= a
.file_info();
45 const PlatformFileInfoProto
& b_file_info
= b
.file_info();
47 if (a_file_info
.last_accessed() != b_file_info
.last_accessed())
48 return a_file_info
.last_accessed() > b_file_info
.last_accessed();
50 // When the entries have the same last access time (which happens quite often
51 // because Drive server doesn't set the field until an entry is viewed via
52 // drive.google.com), we use last modified time as the tie breaker.
53 return a_file_info
.last_modified() > b_file_info
.last_modified();
56 struct ResultCandidateComparator
{
57 bool operator()(const ResultCandidate
* a
, const ResultCandidate
* b
) const {
58 return CompareByTimestamp(a
->entry
, b
->entry
);
62 // A wrapper of std::priority_queue which deals with pointers of values.
63 template<typename T
, typename Compare
>
64 class ScopedPriorityQueue
{
66 ScopedPriorityQueue() {}
68 ~ScopedPriorityQueue() {
73 bool empty() const { return queue_
.empty(); }
75 size_t size() const { return queue_
.size(); }
77 const T
* top() const { return queue_
.top(); }
79 void push(T
* x
) { queue_
.push(x
); }
82 // Keep top alive for the pop() call so that debug checks can access
83 // underlying data (e.g. validating heap property of the priority queue
84 // will call the comparator).
85 T
* saved_top
= queue_
.top();
91 std::priority_queue
<T
*, std::vector
<T
*>, Compare
> queue_
;
93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue
);
96 // Classifies the given entry as hidden if it's not under specific directories.
97 class HiddenEntryClassifier
{
99 HiddenEntryClassifier(ResourceMetadata
* metadata
,
100 const std::string
& mydrive_local_id
)
101 : metadata_(metadata
) {
102 // Only things under My Drive and drive/other are not hidden.
103 is_hiding_child_
[mydrive_local_id
] = false;
104 is_hiding_child_
[util::kDriveOtherDirLocalId
] = false;
106 // Everything else is hidden, including the directories mentioned above
108 is_hiding_child_
[""] = true;
111 // |result| is set to true if |entry| is hidden.
112 FileError
IsHidden(const ResourceEntry
& entry
, bool* result
) {
113 // Look up for parents recursively.
114 std::vector
<std::string
> undetermined_ids
;
115 undetermined_ids
.push_back(entry
.parent_local_id());
117 std::map
<std::string
, bool>::iterator it
=
118 is_hiding_child_
.find(undetermined_ids
.back());
119 for (; it
== is_hiding_child_
.end();
120 it
= is_hiding_child_
.find(undetermined_ids
.back())) {
121 ResourceEntry parent
;
123 metadata_
->GetResourceEntryById(undetermined_ids
.back(), &parent
);
124 if (error
!= FILE_ERROR_OK
)
126 undetermined_ids
.push_back(parent
.parent_local_id());
130 undetermined_ids
.pop_back(); // The last one is already in the map.
131 for (size_t i
= 0; i
< undetermined_ids
.size(); ++i
)
132 is_hiding_child_
[undetermined_ids
[i
]] = it
->second
;
134 *result
= it
->second
;
135 return FILE_ERROR_OK
;
139 ResourceMetadata
* metadata_
;
141 // local ID to is_hidden map.
142 std::map
<std::string
, bool> is_hiding_child_
;
145 // Used to implement SearchMetadata.
146 // Adds entry to the result when appropriate.
147 // In particular, if size of |queries| is larger than 0, only adds files with
148 // the name matching the query.
149 FileError
MaybeAddEntryToResult(
150 ResourceMetadata
* resource_metadata
,
151 ResourceMetadata::Iterator
* it
,
153 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents
>& queries
,
154 const SearchMetadataPredicate
& predicate
,
155 size_t at_most_num_matches
,
156 HiddenEntryClassifier
* hidden_entry_classifier
,
157 ScopedPriorityQueue
<ResultCandidate
, ResultCandidateComparator
>*
159 DCHECK_GE(at_most_num_matches
, result_candidates
->size());
161 const ResourceEntry
& entry
= it
->GetValue();
163 // If the candidate set is already full, and this |entry| is old, do nothing.
164 // We perform this check first in order to avoid the costly find-and-highlight
165 // or FilePath lookup as much as possible.
166 if (result_candidates
->size() == at_most_num_matches
&&
167 !CompareByTimestamp(entry
, result_candidates
->top()->entry
))
168 return FILE_ERROR_OK
;
170 // Add |entry| to the result if the entry is eligible for the given
171 // |options| and matches the query. The base name of the entry must
172 // contain |query| to match the query.
173 std::string highlighted
;
174 if (!predicate
.Run(entry
) ||
175 !FindAndHighlight(entry
.base_name(), queries
, &highlighted
))
176 return FILE_ERROR_OK
;
178 // Hidden entry should not be returned.
180 FileError error
= hidden_entry_classifier
->IsHidden(entry
, &hidden
);
181 if (error
!= FILE_ERROR_OK
|| hidden
)
184 // Make space for |entry| when appropriate.
185 if (result_candidates
->size() == at_most_num_matches
)
186 result_candidates
->pop();
187 result_candidates
->push(new ResultCandidate(it
->GetID(), entry
, highlighted
));
188 return FILE_ERROR_OK
;
191 // Implements SearchMetadata().
192 FileError
SearchMetadataOnBlockingPool(ResourceMetadata
* resource_metadata
,
193 const std::string
& query_text
,
194 const SearchMetadataPredicate
& predicate
,
195 int at_most_num_matches
,
196 MetadataSearchResultVector
* results
) {
197 ScopedPriorityQueue
<ResultCandidate
,
198 ResultCandidateComparator
> result_candidates
;
200 // Prepare data structure for searching.
201 std::vector
<base::string16
> keywords
=
202 base::SplitString(base::UTF8ToUTF16(query_text
),
203 base::StringPiece16(base::kWhitespaceUTF16
),
204 base::TRIM_WHITESPACE
, base::SPLIT_WANT_NONEMPTY
);
206 ScopedVector
<base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents
>
208 for (const auto& keyword
: keywords
) {
210 new base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents(
214 // Prepare an object to filter out hidden entries.
215 ResourceEntry mydrive
;
216 FileError error
= resource_metadata
->GetResourceEntryByPath(
217 util::GetDriveMyDriveRootPath(), &mydrive
);
218 if (error
!= FILE_ERROR_OK
)
220 HiddenEntryClassifier
hidden_entry_classifier(resource_metadata
,
223 // Iterate over entries.
224 scoped_ptr
<ResourceMetadata::Iterator
> it
= resource_metadata
->GetIterator();
225 for (; !it
->IsAtEnd(); it
->Advance()) {
226 FileError error
= MaybeAddEntryToResult(
227 resource_metadata
, it
.get(), queries
, predicate
, at_most_num_matches
,
228 &hidden_entry_classifier
, &result_candidates
);
229 if (error
!= FILE_ERROR_OK
)
233 // Prepare the result.
234 for (; !result_candidates
.empty(); result_candidates
.pop()) {
235 const ResultCandidate
& candidate
= *result_candidates
.top();
236 // The path field of entries in result_candidates are empty at this point,
237 // because we don't want to run the expensive metadata DB look up except for
238 // the final results. Hence, here we fill the part.
240 error
= resource_metadata
->GetFilePath(candidate
.local_id
, &path
);
241 if (error
!= FILE_ERROR_OK
)
243 bool is_directory
= candidate
.entry
.file_info().is_directory();
244 results
->push_back(MetadataSearchResult(
245 path
, is_directory
, candidate
.highlighted_base_name
,
246 candidate
.entry
.file_specific_info().md5()));
249 // Reverse the order here because |result_candidates| puts the most
250 // uninteresting candidate at the top.
251 std::reverse(results
->begin(), results
->end());
253 return FILE_ERROR_OK
;
256 // Runs the SearchMetadataCallback and updates the histogram.
257 void RunSearchMetadataCallback(const SearchMetadataCallback
& callback
,
258 const base::TimeTicks
& start_time
,
259 scoped_ptr
<MetadataSearchResultVector
> results
,
261 if (error
!= FILE_ERROR_OK
)
263 callback
.Run(error
, results
.Pass());
265 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
266 base::TimeTicks::Now() - start_time
);
269 // Appends substring of |original_text| to |highlighted_text| with highlight.
270 void AppendStringWithHighlight(const base::string16
& original_text
,
274 std::string
* highlighted_text
) {
276 highlighted_text
->append("<b>");
278 highlighted_text
->append(net::EscapeForHTML(
279 base::UTF16ToUTF8(original_text
.substr(start
, length
))));
282 highlighted_text
->append("</b>");
288 scoped_refptr
<base::SequencedTaskRunner
> blocking_task_runner
,
289 ResourceMetadata
* resource_metadata
,
290 const std::string
& query
,
291 const SearchMetadataPredicate
& predicate
,
292 size_t at_most_num_matches
,
293 const SearchMetadataCallback
& callback
) {
294 DCHECK(!callback
.is_null());
296 const base::TimeTicks start_time
= base::TimeTicks::Now();
298 scoped_ptr
<MetadataSearchResultVector
> results(
299 new MetadataSearchResultVector
);
300 MetadataSearchResultVector
* results_ptr
= results
.get();
301 base::PostTaskAndReplyWithResult(
302 blocking_task_runner
.get(), FROM_HERE
,
303 base::Bind(&SearchMetadataOnBlockingPool
, resource_metadata
, query
,
304 predicate
, at_most_num_matches
, results_ptr
),
305 base::Bind(&RunSearchMetadataCallback
, callback
, start_time
,
306 base::Passed(&results
)));
309 bool MatchesType(int options
, const ResourceEntry
& entry
) {
310 if ((options
& SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS
) &&
311 entry
.file_specific_info().is_hosted_document())
314 if ((options
& SEARCH_METADATA_EXCLUDE_DIRECTORIES
) &&
315 entry
.file_info().is_directory())
318 if (options
& SEARCH_METADATA_SHARED_WITH_ME
)
319 return entry
.shared_with_me();
321 if (options
& SEARCH_METADATA_OFFLINE
) {
322 if (entry
.file_specific_info().is_hosted_document()) {
323 // Not all hosted documents are cached by Drive offline app.
324 // https://support.google.com/drive/answer/2375012
325 std::string mime_type
= entry
.file_specific_info().content_mime_type();
326 return mime_type
== drive::util::kGoogleDocumentMimeType
||
327 mime_type
== drive::util::kGoogleSpreadsheetMimeType
||
328 mime_type
== drive::util::kGooglePresentationMimeType
||
329 mime_type
== drive::util::kGoogleDrawingMimeType
;
331 return entry
.file_specific_info().cache_state().is_present();
338 bool FindAndHighlight(
339 const std::string
& text
,
341 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents
>& queries
,
342 std::string
* highlighted_text
) {
343 DCHECK(highlighted_text
);
344 highlighted_text
->clear();
346 // Check text matches with all queries.
347 size_t match_start
= 0;
348 size_t match_length
= 0;
350 base::string16 text16
= base::UTF8ToUTF16(text
);
351 std::vector
<bool> highlights(text16
.size(), false);
352 for (auto* query
: queries
) {
353 if (!query
->Search(text16
, &match_start
, &match_length
))
356 std::fill(highlights
.begin() + match_start
,
357 highlights
.begin() + match_start
+ match_length
, true);
360 // Generate highlighted text.
361 size_t start_current_segment
= 0;
363 for (size_t i
= 0; i
< text16
.size(); ++i
) {
364 if (highlights
[start_current_segment
] == highlights
[i
])
367 AppendStringWithHighlight(
368 text16
, start_current_segment
, i
- start_current_segment
,
369 highlights
[start_current_segment
], highlighted_text
);
371 start_current_segment
= i
;
374 DCHECK_GE(text16
.size(), start_current_segment
);
375 AppendStringWithHighlight(
376 text16
, start_current_segment
, text16
.size() - start_current_segment
,
377 highlights
[start_current_segment
], highlighted_text
);
382 } // namespace internal