1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/browser/bookmarks/bookmark_index.h"
11 #include "base/i18n/case_conversion.h"
12 #include "base/strings/string16.h"
13 #include "chrome/browser/bookmarks/bookmark_model.h"
14 #include "chrome/browser/bookmarks/bookmark_title_match.h"
15 #include "chrome/browser/history/history_service.h"
16 #include "chrome/browser/history/history_service_factory.h"
17 #include "chrome/browser/history/query_parser.h"
18 #include "chrome/browser/history/url_database.h"
20 // Used when finding the set of bookmarks that match a query. Each match
21 // represents a set of terms (as an interator into the Index) matching the
22 // query as well as the set of nodes that contain those terms in their titles.
23 struct BookmarkIndex::Match
{
24 // List of terms matching the query.
25 std::list
<Index::const_iterator
> terms
;
27 // The set of nodes matching the terms. As an optimization this is empty
28 // when we match only one term, and is filled in when we get more than one
29 // term. We can do this as when we have only one matching term we know
30 // the set of matching nodes is terms.front()->second.
32 // Use nodes_begin() and nodes_end() to get an iterator over the set as
33 // it handles the necessary switching between nodes and terms.front().
36 // Returns an iterator to the beginning of the matching nodes. See
37 // description of nodes for why this should be used over nodes.begin().
38 NodeSet::const_iterator
nodes_begin() const;
40 // Returns an iterator to the beginning of the matching nodes. See
41 // description of nodes for why this should be used over nodes.end().
42 NodeSet::const_iterator
nodes_end() const;
45 BookmarkIndex::NodeSet::const_iterator
46 BookmarkIndex::Match::nodes_begin() const {
47 return nodes
.empty() ? terms
.front()->second
.begin() : nodes
.begin();
50 BookmarkIndex::NodeSet::const_iterator
BookmarkIndex::Match::nodes_end() const {
51 return nodes
.empty() ? terms
.front()->second
.end() : nodes
.end();
54 BookmarkIndex::BookmarkIndex(content::BrowserContext
* browser_context
)
55 : browser_context_(browser_context
) {
58 BookmarkIndex::~BookmarkIndex() {
61 void BookmarkIndex::Add(const BookmarkNode
* node
) {
64 std::vector
<base::string16
> terms
= ExtractQueryWords(node
->GetTitle());
65 for (size_t i
= 0; i
< terms
.size(); ++i
)
66 RegisterNode(terms
[i
], node
);
69 void BookmarkIndex::Remove(const BookmarkNode
* node
) {
73 std::vector
<base::string16
> terms
= ExtractQueryWords(node
->GetTitle());
74 for (size_t i
= 0; i
< terms
.size(); ++i
)
75 UnregisterNode(terms
[i
], node
);
78 void BookmarkIndex::GetBookmarksWithTitlesMatching(
79 const base::string16
& query
,
81 std::vector
<BookmarkTitleMatch
>* results
) {
82 std::vector
<base::string16
> terms
= ExtractQueryWords(query
);
87 for (size_t i
= 0; i
< terms
.size(); ++i
) {
88 if (!GetBookmarksWithTitleMatchingTerm(terms
[i
], i
== 0, &matches
))
92 NodeTypedCountPairs node_typed_counts
;
93 SortMatches(matches
, &node_typed_counts
);
95 // We use a QueryParser to fill in match positions for us. It's not the most
96 // efficient way to go about this, but by the time we get here we know what
97 // matches and so this shouldn't be performance critical.
99 ScopedVector
<QueryNode
> query_nodes
;
100 parser
.ParseQueryNodes(query
, &query_nodes
.get());
102 // The highest typed counts should be at the beginning of the results vector
103 // so that the best matches will always be included in the results. The loop
104 // that calculates result relevance in HistoryContentsProvider::ConvertResults
105 // will run backwards to assure higher relevance will be attributed to the
107 for (NodeTypedCountPairs::const_iterator i
= node_typed_counts
.begin();
108 i
!= node_typed_counts
.end() && results
->size() < max_count
; ++i
)
109 AddMatchToResults(i
->first
, &parser
, query_nodes
.get(), results
);
112 void BookmarkIndex::SortMatches(const Matches
& matches
,
113 NodeTypedCountPairs
* node_typed_counts
) const {
114 HistoryService
* const history_service
= browser_context_
?
115 HistoryServiceFactory::GetForProfile(
116 Profile::FromBrowserContext(browser_context_
),
117 Profile::EXPLICIT_ACCESS
) : NULL
;
119 history::URLDatabase
* url_db
= history_service
?
120 history_service
->InMemoryDatabase() : NULL
;
122 for (Matches::const_iterator i
= matches
.begin(); i
!= matches
.end(); ++i
)
123 ExtractBookmarkNodePairs(url_db
, *i
, node_typed_counts
);
125 std::sort(node_typed_counts
->begin(), node_typed_counts
->end(),
126 &NodeTypedCountPairSortFunc
);
127 // Eliminate duplicates.
128 node_typed_counts
->erase(std::unique(node_typed_counts
->begin(),
129 node_typed_counts
->end()),
130 node_typed_counts
->end());
133 void BookmarkIndex::ExtractBookmarkNodePairs(
134 history::URLDatabase
* url_db
,
136 NodeTypedCountPairs
* node_typed_counts
) const {
138 for (NodeSet::const_iterator i
= match
.nodes_begin();
139 i
!= match
.nodes_end(); ++i
) {
142 url_db
->GetRowForURL((*i
)->url(), &url
);
143 NodeTypedCountPair
pair(*i
, url
.typed_count());
144 node_typed_counts
->push_back(pair
);
148 void BookmarkIndex::AddMatchToResults(
149 const BookmarkNode
* node
,
151 const std::vector
<QueryNode
*>& query_nodes
,
152 std::vector
<BookmarkTitleMatch
>* results
) {
153 BookmarkTitleMatch title_match
;
154 // Check that the result matches the query. The previous search
155 // was a simple per-word search, while the more complex matching
156 // of QueryParser may filter it out. For example, the query
157 // ["thi"] will match the bookmark titled [Thinking], but since
158 // ["thi"] is quoted we don't want to do a prefix match.
159 if (parser
->DoesQueryMatch(node
->GetTitle(), query_nodes
,
160 &(title_match
.match_positions
))) {
161 title_match
.node
= node
;
162 results
->push_back(title_match
);
166 bool BookmarkIndex::GetBookmarksWithTitleMatchingTerm(const base::string16
& term
,
169 Index::const_iterator i
= index_
.lower_bound(term
);
170 if (i
== index_
.end())
173 if (!QueryParser::IsWordLongEnoughForPrefixSearch(term
)) {
174 // Term is too short for prefix match, compare using exact match.
175 if (i
->first
!= term
)
176 return false; // No bookmarks with this term.
180 match
.terms
.push_back(i
);
181 matches
->push_back(match
);
184 CombineMatchesInPlace(i
, matches
);
185 } else if (first_term
) {
186 // This is the first term and we're doing a prefix match. Loop through
187 // index adding all entries that start with term to matches.
188 while (i
!= index_
.end() &&
189 i
->first
.size() >= term
.size() &&
190 term
.compare(0, term
.size(), i
->first
, 0, term
.size()) == 0) {
192 match
.terms
.push_back(i
);
193 matches
->push_back(match
);
197 // Prefix match and not the first term. Loop through index combining
198 // current matches in matches with term, placing result in result.
200 while (i
!= index_
.end() &&
201 i
->first
.size() >= term
.size() &&
202 term
.compare(0, term
.size(), i
->first
, 0, term
.size()) == 0) {
203 CombineMatches(i
, *matches
, &result
);
206 matches
->swap(result
);
208 return !matches
->empty();
211 void BookmarkIndex::CombineMatchesInPlace(const Index::const_iterator
& index_i
,
213 for (size_t i
= 0; i
< matches
->size(); ) {
214 Match
* match
= &((*matches
)[i
]);
215 NodeSet intersection
;
216 std::set_intersection(match
->nodes_begin(), match
->nodes_end(),
217 index_i
->second
.begin(), index_i
->second
.end(),
218 std::inserter(intersection
, intersection
.begin()));
219 if (intersection
.empty()) {
220 matches
->erase(matches
->begin() + i
);
222 match
->terms
.push_back(index_i
);
223 match
->nodes
.swap(intersection
);
229 void BookmarkIndex::CombineMatches(const Index::const_iterator
& index_i
,
230 const Matches
& current_matches
,
232 for (size_t i
= 0; i
< current_matches
.size(); ++i
) {
233 const Match
& match
= current_matches
[i
];
234 NodeSet intersection
;
235 std::set_intersection(match
.nodes_begin(), match
.nodes_end(),
236 index_i
->second
.begin(), index_i
->second
.end(),
237 std::inserter(intersection
, intersection
.begin()));
238 if (!intersection
.empty()) {
239 result
->push_back(Match());
240 Match
& combined_match
= result
->back();
241 combined_match
.terms
= match
.terms
;
242 combined_match
.terms
.push_back(index_i
);
243 combined_match
.nodes
.swap(intersection
);
248 std::vector
<base::string16
> BookmarkIndex::ExtractQueryWords(
249 const base::string16
& query
) {
250 std::vector
<base::string16
> terms
;
252 return std::vector
<base::string16
>();
254 // TODO(brettw): use ICU normalization:
255 // http://userguide.icu-project.org/transforms/normalization
256 parser
.ParseQueryWords(base::i18n::ToLower(query
), &terms
);
260 void BookmarkIndex::RegisterNode(const base::string16
& term
,
261 const BookmarkNode
* node
) {
262 index_
[term
].insert(node
);
265 void BookmarkIndex::UnregisterNode(const base::string16
& term
,
266 const BookmarkNode
* node
) {
267 Index::iterator i
= index_
.find(term
);
268 if (i
== index_
.end()) {
269 // We can get here if the node has the same term more than once. For
270 // example, a bookmark with the title 'foo foo' would end up here.
273 i
->second
.erase(node
);
274 if (i
->second
.empty())