3 // DocumentOrderedBinnedIterator
5 // October 15, 2007 -- tds
8 #ifndef GALAGO_DOCUMENTORDEREDBINNEDITERATOR_HPP
9 #define GALAGO_DOCUMENTORDEREDBINNEDITERATOR_HPP
11 #include "lemur/RVLCompress.hpp"
15 class DocumentOrderedBinnedIterator
{
18 bool operator() ( DocumentOrderedBinnedIterator
* one
, DocumentOrderedBinnedIterator
* two
) {
19 return one
->listByteLength() < two
->listByteLength();
23 struct bound_greater
{
24 bool operator() ( DocumentOrderedBinnedIterator
* one
, DocumentOrderedBinnedIterator
* two
) {
25 return one
->listBound() > two
->listBound();
53 const char* _skipsStart
;
54 const char* _skipsEnd
;
56 const char* _postings
;
57 const char* _postingsStart
;
58 const char* _postingsEnd
;
67 * Reads the next skip datum.
71 if( _skips
== _skipsEnd
) {
72 _lastSkip
= _currentSkip
;
73 _currentSkip
.document
= MAX_INT32
;
74 _currentSkip
.bound
= _listBound
;
75 _currentSkip
.offset
= _dataLength
;
77 _lastSkip
= _currentSkip
;
79 _skips
= lemur::utility::RVLCompress::decompress_int( _skips
, _currentSkip
.bound
);
80 _skips
= lemur::utility::RVLCompress::decompress_int( _skips
, _currentSkip
.document
);
81 _skips
= lemur::utility::RVLCompress::decompress_longlong( _skips
, _currentSkip
.offset
);
83 _currentSkip
.document
+= _lastSkip
.document
;
84 _currentSkip
.offset
+= _lastSkip
.offset
;
86 assert( _currentSkip
.offset
<= _dataLength
);
91 if( _lastSkip
.document
== MAX_INT32
) {
96 if( _lastSkip
.offset
> _postings
- _postingsStart
)
97 _postings
= _postingsStart
+ _lastSkip
.offset
;
99 assert( _postings
<= _postingsEnd
);
100 _document
= _lastSkip
.document
;
104 void readDocument() {
105 assert( _postings
<= _postingsEnd
);
107 if( _postings
== _postingsEnd
) {
112 _postings
= lemur::utility::RVLCompress::decompress_int( _postings
, delta
);
113 _postings
= lemur::utility::RVLCompress::decompress_int( _postings
, _score
);
115 assert( _document
<= _lastSkip
.document
|| _document
> _currentSkip
.document
|| _score
<= _currentSkip
.bound
);
120 DocumentOrderedBinnedIterator( const char* data
, UINT64 start
, UINT64 end
) :
121 _data(data
), _start(start
), _end(end
)
127 const char* header
= _data
+ _start
;
129 header
= lemur::utility::RVLCompress::decompress_int( header
, _options
);
130 header
= lemur::utility::RVLCompress::decompress_int( header
, _documentCount
);
131 header
= lemur::utility::RVLCompress::decompress_int( header
, _listBound
);
133 if( (_options
& 1) > 0 ) {
134 header
= lemur::utility::RVLCompress::decompress_longlong( header
, _skipLength
);
141 header
= lemur::utility::RVLCompress::decompress_longlong( header
, _dataLength
);
144 _lastSkip
.document
= 0;
145 _lastSkip
.offset
= 0;
146 _currentSkip
= _lastSkip
;
149 _skipsStart
= header
;
150 _skipsEnd
= _skips
+ _skipLength
;
152 _postings
= _skipsEnd
;
153 _postingsStart
= _postings
;
154 _postingsEnd
= _postings
+ _dataLength
;
163 UINT64
listByteLength() {
164 return _end
- _start
;
175 int currentDocument() {
179 int currentBoundDocument() {
180 return _currentSkip
.document
;
183 int lastBoundDocument() {
184 return _lastSkip
.document
;
188 return _currentSkip
.bound
;
191 void readSkipsTo( int document
) {
192 while( _currentSkip
.document
< document
&& _skips
!= _skipsEnd
)
196 bool skipToBound( int bound
) {
197 while( _skips
!= _skipsEnd
&& _currentSkip
.bound
< bound
)
200 if( _document
< _lastSkip
.document
)
203 while( !_done
&& _score
< bound
)
206 return !_done
&& _score
< bound
;
209 bool skipToDocument( int skipTo
) {
210 while( skipTo
> _currentSkip
.document
)
213 if( _document
< _lastSkip
.document
&& skipTo
> _lastSkip
.document
)
216 while( !_done
&& skipTo
> _document
)
219 return !_done
&& _document
== skipTo
;
222 bool skipToDocument( int skipTo
, int bound
) {
223 while( skipTo
> _currentSkip
.document
)
226 if( _document
< _lastSkip
.document
&& skipTo
> _lastSkip
.document
)
229 if( _currentSkip
.bound
< bound
)
232 while( !_done
&& skipTo
> _document
)
235 return !_done
&& _document
== skipTo
&& _score
>= bound
;
238 void nextDocument() {
248 #endif // GALAGO_DOCUMENTORDEREDBINNEDITERATOR_HPP