1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 #include "llvm/ADT/STLExtras.h"
20 /// Implements Iterator over the intersection of other iterators.
22 /// AndIterator iterates through common items among all children. It becomes
23 /// exhausted as soon as any child becomes exhausted. After each mutation, the
24 /// iterator restores the invariant: all children must point to the same item.
25 class AndIterator
: public Iterator
{
27 explicit AndIterator(std::vector
<std::unique_ptr
<Iterator
>> AllChildren
)
28 : Iterator(Kind::And
), Children(std::move(AllChildren
)) {
29 assert(!Children
.empty() && "AND iterator should have at least one child.");
30 // Establish invariants.
31 for (const auto &Child
: Children
)
32 ReachedEnd
|= Child
->reachedEnd();
34 // When children are sorted by the estimateSize(), sync() calls are more
35 // effective. Each sync() starts with the first child and makes sure all
36 // children point to the same element. If any child is "above" the previous
37 // ones, the algorithm resets and advances the children to the next
38 // highest element starting from the front. When child iterators in the
39 // beginning have smaller estimated size, the sync() will have less restarts
40 // and become more effective.
41 llvm::sort(Children
, [](const std::unique_ptr
<Iterator
> &LHS
,
42 const std::unique_ptr
<Iterator
> &RHS
) {
43 return LHS
->estimateSize() < RHS
->estimateSize();
47 bool reachedEnd() const override
{ return ReachedEnd
; }
49 /// Advances all children to the next common item.
50 void advance() override
{
51 assert(!reachedEnd() && "AND iterator can't advance() at the end.");
52 Children
.front()->advance();
56 /// Advances all children to the next common item with DocumentID >= ID.
57 void advanceTo(DocID ID
) override
{
58 assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
59 Children
.front()->advanceTo(ID
);
63 DocID
peek() const override
{ return Children
.front()->peek(); }
65 float consume() override
{
66 assert(!reachedEnd() && "AND iterator can't consume() at the end.");
68 for (const auto &Child
: Children
)
69 Boost
*= Child
->consume();
73 size_t estimateSize() const override
{
74 return Children
.front()->estimateSize();
78 llvm::raw_ostream
&dump(llvm::raw_ostream
&OS
) const override
{
81 for (const auto &Child
: Children
) {
82 OS
<< Separator
<< *Child
;
89 /// Restores class invariants: each child will point to the same element after
92 ReachedEnd
|= Children
.front()->reachedEnd();
95 auto SyncID
= Children
.front()->peek();
96 // Indicates whether any child needs to be advanced to new SyncID.
97 bool NeedsAdvance
= false;
100 for (auto &Child
: Children
) {
101 Child
->advanceTo(SyncID
);
102 ReachedEnd
|= Child
->reachedEnd();
103 // If any child reaches end And iterator can not match any other items.
104 // In this case, just terminate the process.
107 // Cache the result so that peek() is not called again as it may be
108 // quite expensive in AND with large subtrees.
109 auto Candidate
= Child
->peek();
110 // If any child goes beyond given ID (i.e. ID is not the common item),
111 // all children should be advanced to the next common item.
112 if (Candidate
> SyncID
) {
115 // Reset and try to sync again. Sync starts with the first child as
116 // this is the cheapest (smallest size estimate). This way advanceTo
117 // is called on the large posting lists once the sync point is very
122 } while (NeedsAdvance
);
125 /// AndIterator owns its children and ensures that all of them point to the
126 /// same element. As soon as one child gets exhausted, AndIterator can no
127 /// longer advance and has reached its end.
128 std::vector
<std::unique_ptr
<Iterator
>> Children
;
129 /// Indicates whether any child is exhausted. It is cheaper to maintain and
130 /// update the field, rather than traversing the whole subtree in each
131 /// reachedEnd() call.
132 bool ReachedEnd
= false;
133 friend Corpus
; // For optimizations.
136 /// Implements Iterator over the union of other iterators.
138 /// OrIterator iterates through all items which can be pointed to by at least
139 /// one child. To preserve the sorted order, this iterator always advances the
140 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
141 /// soon as all of its children are exhausted.
142 class OrIterator
: public Iterator
{
144 explicit OrIterator(std::vector
<std::unique_ptr
<Iterator
>> AllChildren
)
145 : Iterator(Kind::Or
), Children(std::move(AllChildren
)) {
146 assert(!Children
.empty() && "OR iterator should have at least one child.");
149 /// Returns true if all children are exhausted.
150 bool reachedEnd() const override
{
151 for (const auto &Child
: Children
)
152 if (!Child
->reachedEnd())
157 /// Moves each child pointing to the smallest DocID to the next item.
158 void advance() override
{
159 assert(!reachedEnd() && "OR iterator can't advance() at the end.");
160 const auto SmallestID
= peek();
161 for (const auto &Child
: Children
)
162 if (!Child
->reachedEnd() && Child
->peek() == SmallestID
)
166 /// Advances each child to the next existing element with DocumentID >= ID.
167 void advanceTo(DocID ID
) override
{
168 assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
169 for (const auto &Child
: Children
)
170 if (!Child
->reachedEnd())
171 Child
->advanceTo(ID
);
174 /// Returns the element under cursor of the child with smallest Child->peek()
176 DocID
peek() const override
{
177 assert(!reachedEnd() && "OR iterator can't peek() at the end.");
178 DocID Result
= std::numeric_limits
<DocID
>::max();
180 for (const auto &Child
: Children
)
181 if (!Child
->reachedEnd())
182 Result
= std::min(Result
, Child
->peek());
187 // Returns the maximum boosting score among all Children when iterator
188 // points to the current ID.
189 float consume() override
{
190 assert(!reachedEnd() && "OR iterator can't consume() at the end.");
191 const DocID ID
= peek();
193 for (const auto &Child
: Children
)
194 if (!Child
->reachedEnd() && Child
->peek() == ID
)
195 Boost
= std::max(Boost
, Child
->consume());
199 size_t estimateSize() const override
{
201 for (const auto &Child
: Children
)
202 Size
= std::max(Size
, Child
->estimateSize());
207 llvm::raw_ostream
&dump(llvm::raw_ostream
&OS
) const override
{
209 auto *Separator
= "";
210 for (const auto &Child
: Children
) {
211 OS
<< Separator
<< *Child
;
218 std::vector
<std::unique_ptr
<Iterator
>> Children
;
219 friend Corpus
; // For optimizations.
222 /// TrueIterator handles PostingLists which contain all items of the index. It
223 /// stores size of the virtual posting list, and all operations are performed
225 class TrueIterator
: public Iterator
{
227 explicit TrueIterator(DocID Size
) : Iterator(Kind::True
), Size(Size
) {}
229 bool reachedEnd() const override
{ return Index
>= Size
; }
231 void advance() override
{
232 assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
236 void advanceTo(DocID ID
) override
{
237 assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
238 Index
= std::min(ID
, Size
);
241 DocID
peek() const override
{
242 assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
246 float consume() override
{
247 assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
251 size_t estimateSize() const override
{ return Size
; }
254 llvm::raw_ostream
&dump(llvm::raw_ostream
&OS
) const override
{
259 /// Size of the underlying virtual PostingList.
263 /// FalseIterator yields no results.
264 class FalseIterator
: public Iterator
{
266 FalseIterator() : Iterator(Kind::False
) {}
267 bool reachedEnd() const override
{ return true; }
268 void advance() override
{ assert(false); }
269 void advanceTo(DocID ID
) override
{ assert(false); }
270 DocID
peek() const override
{
274 float consume() override
{
278 size_t estimateSize() const override
{ return 0; }
281 llvm::raw_ostream
&dump(llvm::raw_ostream
&OS
) const override
{
282 return OS
<< "false";
286 /// Boost iterator is a wrapper around its child which multiplies scores of
287 /// each retrieved item by a given factor.
288 class BoostIterator
: public Iterator
{
290 BoostIterator(std::unique_ptr
<Iterator
> Child
, float Factor
)
291 : Child(std::move(Child
)), Factor(Factor
) {}
293 bool reachedEnd() const override
{ return Child
->reachedEnd(); }
295 void advance() override
{ Child
->advance(); }
297 void advanceTo(DocID ID
) override
{ Child
->advanceTo(ID
); }
299 DocID
peek() const override
{ return Child
->peek(); }
301 float consume() override
{ return Child
->consume() * Factor
; }
303 size_t estimateSize() const override
{ return Child
->estimateSize(); }
306 llvm::raw_ostream
&dump(llvm::raw_ostream
&OS
) const override
{
307 return OS
<< "(* " << Factor
<< ' ' << *Child
<< ')';
310 std::unique_ptr
<Iterator
> Child
;
314 /// This iterator limits the number of items retrieved from the child iterator
315 /// on top of the query tree. To ensure that query tree with LIMIT iterators
316 /// inside works correctly, users have to call Root->consume(Root->peek()) each
317 /// time item is retrieved at the root of query tree.
318 class LimitIterator
: public Iterator
{
320 LimitIterator(std::unique_ptr
<Iterator
> Child
, size_t Limit
)
321 : Child(std::move(Child
)), Limit(Limit
), ItemsLeft(Limit
) {}
323 bool reachedEnd() const override
{
324 return ItemsLeft
== 0 || Child
->reachedEnd();
327 void advance() override
{ Child
->advance(); }
329 void advanceTo(DocID ID
) override
{ Child
->advanceTo(ID
); }
331 DocID
peek() const override
{ return Child
->peek(); }
333 /// Decreases the limit in case the element consumed at top of the query tree
334 /// comes from the underlying iterator.
335 float consume() override
{
336 assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
338 return Child
->consume();
341 size_t estimateSize() const override
{
342 return std::min(Child
->estimateSize(), Limit
);
346 llvm::raw_ostream
&dump(llvm::raw_ostream
&OS
) const override
{
347 return OS
<< "(LIMIT " << Limit
<< " " << *Child
<< ')';
350 std::unique_ptr
<Iterator
> Child
;
357 std::vector
<std::pair
<DocID
, float>> consume(Iterator
&It
) {
358 std::vector
<std::pair
<DocID
, float>> Result
;
359 for (; !It
.reachedEnd(); It
.advance())
360 Result
.emplace_back(It
.peek(), It
.consume());
364 std::unique_ptr
<Iterator
>
365 Corpus::intersect(std::vector
<std::unique_ptr
<Iterator
>> Children
) const {
366 std::vector
<std::unique_ptr
<Iterator
>> RealChildren
;
367 for (auto &Child
: Children
) {
368 switch (Child
->kind()) {
369 case Iterator::Kind::True
:
370 break; // No effect, drop the iterator.
371 case Iterator::Kind::False
:
372 return std::move(Child
); // Intersection is empty.
373 case Iterator::Kind::And
: {
374 // Inline nested AND into parent AND.
375 auto &NewChildren
= static_cast<AndIterator
*>(Child
.get())->Children
;
376 std::move(NewChildren
.begin(), NewChildren
.end(),
377 std::back_inserter(RealChildren
));
381 RealChildren
.push_back(std::move(Child
));
384 switch (RealChildren
.size()) {
388 return std::move(RealChildren
.front());
390 return std::make_unique
<AndIterator
>(std::move(RealChildren
));
394 std::unique_ptr
<Iterator
>
395 Corpus::unionOf(std::vector
<std::unique_ptr
<Iterator
>> Children
) const {
396 std::vector
<std::unique_ptr
<Iterator
>> RealChildren
;
397 for (auto &Child
: Children
) {
398 switch (Child
->kind()) {
399 case Iterator::Kind::False
:
400 break; // No effect, drop the iterator.
401 case Iterator::Kind::Or
: {
402 // Inline nested OR into parent OR.
403 auto &NewChildren
= static_cast<OrIterator
*>(Child
.get())->Children
;
404 std::move(NewChildren
.begin(), NewChildren
.end(),
405 std::back_inserter(RealChildren
));
408 case Iterator::Kind::True
:
409 // Don't return all(), which would discard sibling boosts.
411 RealChildren
.push_back(std::move(Child
));
414 switch (RealChildren
.size()) {
418 return std::move(RealChildren
.front());
420 return std::make_unique
<OrIterator
>(std::move(RealChildren
));
424 std::unique_ptr
<Iterator
> Corpus::all() const {
425 return std::make_unique
<TrueIterator
>(Size
);
428 std::unique_ptr
<Iterator
> Corpus::none() const {
429 return std::make_unique
<FalseIterator
>();
432 std::unique_ptr
<Iterator
> Corpus::boost(std::unique_ptr
<Iterator
> Child
,
433 float Factor
) const {
436 if (Child
->kind() == Iterator::Kind::False
)
438 return std::make_unique
<BoostIterator
>(std::move(Child
), Factor
);
441 std::unique_ptr
<Iterator
> Corpus::limit(std::unique_ptr
<Iterator
> Child
,
442 size_t Limit
) const {
443 if (Child
->kind() == Iterator::Kind::False
)
445 return std::make_unique
<LimitIterator
>(std::move(Child
), Limit
);
449 } // namespace clangd