1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/chromeos/drive/search_metadata.h"
10 #include "base/bind.h"
11 #include "base/i18n/string_search.h"
12 #include "base/metrics/histogram.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "base/time/time.h"
15 #include "chrome/browser/chromeos/drive/file_system_util.h"
16 #include "content/public/browser/browser_thread.h"
17 #include "google_apis/drive/gdata_wapi_parser.h"
18 #include "net/base/escape.h"
20 using content::BrowserThread
;
27 struct ResultCandidate
{
28 ResultCandidate(const std::string
& local_id
,
29 const ResourceEntry
& entry
,
30 const std::string
& highlighted_base_name
)
33 highlighted_base_name(highlighted_base_name
) {
38 std::string highlighted_base_name
;
41 // Used to sort the result candidates per the last accessed/modified time. The
42 // recently accessed/modified files come first.
43 bool CompareByTimestamp(const ResourceEntry
& a
, const ResourceEntry
& b
) {
44 const PlatformFileInfoProto
& a_file_info
= a
.file_info();
45 const PlatformFileInfoProto
& b_file_info
= b
.file_info();
47 if (a_file_info
.last_accessed() != b_file_info
.last_accessed())
48 return a_file_info
.last_accessed() > b_file_info
.last_accessed();
50 // When the entries have the same last access time (which happens quite often
51 // because Drive server doesn't set the field until an entry is viewed via
52 // drive.google.com), we use last modified time as the tie breaker.
53 return a_file_info
.last_modified() > b_file_info
.last_modified();
56 struct ResultCandidateComparator
{
57 bool operator()(const ResultCandidate
* a
, const ResultCandidate
* b
) const {
58 return CompareByTimestamp(a
->entry
, b
->entry
);
62 // A wrapper of std::priority_queue which deals with pointers of values.
63 template<typename T
, typename Compare
>
64 class ScopedPriorityQueue
{
66 ScopedPriorityQueue() {}
68 ~ScopedPriorityQueue() {
73 bool empty() const { return queue_
.empty(); }
75 size_t size() const { return queue_
.size(); }
77 const T
* top() const { return queue_
.top(); }
79 void push(T
* x
) { queue_
.push(x
); }
82 // Keep top alive for the pop() call so that debug checks can access
83 // underlying data (e.g. validating heap property of the priority queue
84 // will call the comparator).
85 T
* saved_top
= queue_
.top();
91 std::priority_queue
<T
*, std::vector
<T
*>, Compare
> queue_
;
93 DISALLOW_COPY_AND_ASSIGN(ScopedPriorityQueue
);
96 // Classifies the given entry as trashed if it's placed under the trash.
97 class TrashedEntryClassifier
{
99 explicit TrashedEntryClassifier(ResourceMetadata
* metadata
)
100 : metadata_(metadata
) {
101 trashed_
[""] = false;
102 trashed_
[util::kDriveTrashDirLocalId
] = true;
105 // |result| is set to true if |entry| is under the trash.
106 FileError
IsTrashed(const ResourceEntry
& entry
, bool* result
) {
107 // parent_local_id cannot be used to classify the trash itself.
108 if (entry
.local_id() == util::kDriveTrashDirLocalId
) {
110 return FILE_ERROR_OK
;
113 // Look up for parents recursively.
114 std::vector
<std::string
> undetermined_ids
;
115 undetermined_ids
.push_back(entry
.parent_local_id());
117 std::map
<std::string
, bool>::iterator it
=
118 trashed_
.find(undetermined_ids
.back());
119 for (; it
== trashed_
.end(); it
= trashed_
.find(undetermined_ids
.back())) {
120 ResourceEntry parent
;
122 metadata_
->GetResourceEntryById(undetermined_ids
.back(), &parent
);
123 if (error
!= FILE_ERROR_OK
)
125 undetermined_ids
.push_back(parent
.parent_local_id());
128 // Cache the result to |trashed_|.
129 undetermined_ids
.pop_back(); // The last one is already in |trashed_|.
130 for (size_t i
= 0; i
< undetermined_ids
.size(); ++i
)
131 trashed_
[undetermined_ids
[i
]] = it
->second
;
133 *result
= it
->second
;
134 return FILE_ERROR_OK
;
138 ResourceMetadata
* metadata_
;
139 std::map
<std::string
, bool> trashed_
; // local ID to is_trashed map.
142 // Returns true if |entry| is eligible for the search |options| and should be
143 // tested for the match with the query. If
144 // SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS is requested, the hosted documents
145 // are skipped. If SEARCH_METADATA_EXCLUDE_DIRECTORIES is requested, the
146 // directories are skipped. If SEARCH_METADATA_SHARED_WITH_ME is requested, only
147 // the entries with shared-with-me label will be tested. If
148 // SEARCH_METADATA_OFFLINE is requested, only hosted documents and cached files
149 // match with the query. This option can not be used with other options.
150 bool IsEligibleEntry(const ResourceEntry
& entry
,
151 ResourceMetadata::Iterator
* it
,
153 if ((options
& SEARCH_METADATA_EXCLUDE_HOSTED_DOCUMENTS
) &&
154 entry
.file_specific_info().is_hosted_document())
157 if ((options
& SEARCH_METADATA_EXCLUDE_DIRECTORIES
) &&
158 entry
.file_info().is_directory())
161 if (options
& SEARCH_METADATA_SHARED_WITH_ME
)
162 return entry
.shared_with_me();
164 if (options
& SEARCH_METADATA_OFFLINE
) {
165 if (entry
.file_specific_info().is_hosted_document()) {
166 // Not all hosted documents are cached by Drive offline app.
167 // http://support.google.com/drive/bin/answer.py?hl=en&answer=1628467
168 switch (google_apis::ResourceEntry::GetEntryKindFromExtension(
169 entry
.file_specific_info().document_extension())) {
170 case google_apis::ENTRY_KIND_DOCUMENT
:
171 case google_apis::ENTRY_KIND_SPREADSHEET
:
172 case google_apis::ENTRY_KIND_PRESENTATION
:
173 case google_apis::ENTRY_KIND_DRAWING
:
179 FileCacheEntry cache_entry
;
180 it
->GetCacheEntry(&cache_entry
);
181 return cache_entry
.is_present();
185 // Exclude "drive", "drive/root", and "drive/other".
186 if (it
->GetID() == util::kDriveGrandRootLocalId
||
187 entry
.parent_local_id() == util::kDriveGrandRootLocalId
) {
194 // Used to implement SearchMetadata.
195 // Adds entry to the result when appropriate.
196 // In particular, if |query| is non-null, only adds files with the name matching
198 FileError
MaybeAddEntryToResult(
199 ResourceMetadata
* resource_metadata
,
200 ResourceMetadata::Iterator
* it
,
201 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents
* query
,
203 size_t at_most_num_matches
,
204 TrashedEntryClassifier
* trashed_entry_classifier
,
205 ScopedPriorityQueue
<ResultCandidate
,
206 ResultCandidateComparator
>* result_candidates
) {
207 DCHECK_GE(at_most_num_matches
, result_candidates
->size());
209 const ResourceEntry
& entry
= it
->GetValue();
211 // If the candidate set is already full, and this |entry| is old, do nothing.
212 // We perform this check first in order to avoid the costly find-and-highlight
213 // or FilePath lookup as much as possible.
214 if (result_candidates
->size() == at_most_num_matches
&&
215 !CompareByTimestamp(entry
, result_candidates
->top()->entry
))
216 return FILE_ERROR_OK
;
218 // Add |entry| to the result if the entry is eligible for the given
219 // |options| and matches the query. The base name of the entry must
220 // contain |query| to match the query.
221 std::string highlighted
;
222 if (!IsEligibleEntry(entry
, it
, options
) ||
223 (query
&& !FindAndHighlight(entry
.base_name(), query
, &highlighted
)))
224 return FILE_ERROR_OK
;
226 // Trashed entry should not be returned.
227 bool trashed
= false;
228 FileError error
= trashed_entry_classifier
->IsTrashed(entry
, &trashed
);
229 if (error
!= FILE_ERROR_OK
|| trashed
)
232 // Make space for |entry| when appropriate.
233 if (result_candidates
->size() == at_most_num_matches
)
234 result_candidates
->pop();
235 result_candidates
->push(new ResultCandidate(it
->GetID(), entry
, highlighted
));
236 return FILE_ERROR_OK
;
239 // Implements SearchMetadata().
240 FileError
SearchMetadataOnBlockingPool(ResourceMetadata
* resource_metadata
,
241 const std::string
& query_text
,
243 int at_most_num_matches
,
244 MetadataSearchResultVector
* results
) {
245 ScopedPriorityQueue
<ResultCandidate
,
246 ResultCandidateComparator
> result_candidates
;
248 // Prepare data structure for searching.
249 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents
query(
250 base::UTF8ToUTF16(query_text
));
252 // Iterate over entries.
253 TrashedEntryClassifier
trashed_entry_classifier(resource_metadata
);
254 scoped_ptr
<ResourceMetadata::Iterator
> it
= resource_metadata
->GetIterator();
255 for (; !it
->IsAtEnd(); it
->Advance()) {
256 FileError error
= MaybeAddEntryToResult(resource_metadata
, it
.get(),
257 query_text
.empty() ? NULL
: &query
,
260 &trashed_entry_classifier
,
262 if (error
!= FILE_ERROR_OK
)
266 // Prepare the result.
267 for (; !result_candidates
.empty(); result_candidates
.pop()) {
268 const ResultCandidate
& candidate
= *result_candidates
.top();
269 // The path field of entries in result_candidates are empty at this point,
270 // because we don't want to run the expensive metadata DB look up except for
271 // the final results. Hence, here we fill the part.
272 base::FilePath path
= resource_metadata
->GetFilePath(candidate
.local_id
);
274 return FILE_ERROR_FAILED
;
275 results
->push_back(MetadataSearchResult(
276 path
, candidate
.entry
, candidate
.highlighted_base_name
));
279 // Reverse the order here because |result_candidates| puts the most
280 // uninteresting candidate at the top.
281 std::reverse(results
->begin(), results
->end());
283 return FILE_ERROR_OK
;
286 // Runs the SearchMetadataCallback and updates the histogram.
287 void RunSearchMetadataCallback(const SearchMetadataCallback
& callback
,
288 const base::TimeTicks
& start_time
,
289 scoped_ptr
<MetadataSearchResultVector
> results
,
291 if (error
!= FILE_ERROR_OK
)
293 callback
.Run(error
, results
.Pass());
295 UMA_HISTOGRAM_TIMES("Drive.SearchMetadataTime",
296 base::TimeTicks::Now() - start_time
);
302 scoped_refptr
<base::SequencedTaskRunner
> blocking_task_runner
,
303 ResourceMetadata
* resource_metadata
,
304 const std::string
& query
,
306 int at_most_num_matches
,
307 const SearchMetadataCallback
& callback
) {
308 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI
));
309 DCHECK_LE(0, at_most_num_matches
);
310 DCHECK(!callback
.is_null());
312 const base::TimeTicks start_time
= base::TimeTicks::Now();
314 scoped_ptr
<MetadataSearchResultVector
> results(
315 new MetadataSearchResultVector
);
316 MetadataSearchResultVector
* results_ptr
= results
.get();
317 base::PostTaskAndReplyWithResult(blocking_task_runner
.get(),
319 base::Bind(&SearchMetadataOnBlockingPool
,
325 base::Bind(&RunSearchMetadataCallback
,
328 base::Passed(&results
)));
331 bool FindAndHighlight(
332 const std::string
& text
,
333 base::i18n::FixedPatternStringSearchIgnoringCaseAndAccents
* query
,
334 std::string
* highlighted_text
) {
336 DCHECK(highlighted_text
);
337 highlighted_text
->clear();
339 base::string16 text16
= base::UTF8ToUTF16(text
);
340 size_t match_start
= 0;
341 size_t match_length
= 0;
342 if (!query
->Search(text16
, &match_start
, &match_length
))
345 base::string16 pre
= text16
.substr(0, match_start
);
346 base::string16 match
= text16
.substr(match_start
, match_length
);
347 base::string16 post
= text16
.substr(match_start
+ match_length
);
348 highlighted_text
->append(net::EscapeForHTML(base::UTF16ToUTF8(pre
)));
349 highlighted_text
->append("<b>");
350 highlighted_text
->append(net::EscapeForHTML(base::UTF16ToUTF8(match
)));
351 highlighted_text
->append("</b>");
352 highlighted_text
->append(net::EscapeForHTML(base::UTF16ToUTF8(post
)));
356 } // namespace internal