Initial import into git.
[galago.git] / cpp / galago / include / DocumentOrderedBinnedIterator.hpp
blob46e94ee598b870590027e8383cda578af2c6d720
2 //
3 // DocumentOrderedBinnedIterator
4 //
5 // October 15, 2007 -- tds
6 //
8 #ifndef GALAGO_DOCUMENTORDEREDBINNEDITERATOR_HPP
9 #define GALAGO_DOCUMENTORDEREDBINNEDITERATOR_HPP
11 #include "lemur/RVLCompress.hpp"
12 #include <assert.h>
13 #include <iostream>
15 class DocumentOrderedBinnedIterator {
16 public:
17 struct length_less {
18 bool operator() ( DocumentOrderedBinnedIterator* one, DocumentOrderedBinnedIterator* two ) {
19 return one->listByteLength() < two->listByteLength();
23 struct bound_greater {
24 bool operator() ( DocumentOrderedBinnedIterator* one, DocumentOrderedBinnedIterator* two ) {
25 return one->listBound() > two->listBound();
29 private:
30 const char* _data;
31 UINT64 _start;
32 UINT64 _end;
34 struct Skip {
35 public:
36 int document;
37 int bound;
38 UINT64 offset;
41 Skip _lastSkip;
42 Skip _currentSkip;
44 int _options;
45 int _documentCount;
46 int _listBound;
47 bool _hasSkips;
49 UINT64 _skipLength;
50 UINT64 _dataLength;
52 const char* _skips;
53 const char* _skipsStart;
54 const char* _skipsEnd;
56 const char* _postings;
57 const char* _postingsStart;
58 const char* _postingsEnd;
60 int _document;
61 int _score;
62 bool _done;
65 private:
66 /**
67 * Reads the next skip datum.
70 void readSkip() {
71 if( _skips == _skipsEnd ) {
72 _lastSkip = _currentSkip;
73 _currentSkip.document = MAX_INT32;
74 _currentSkip.bound = _listBound;
75 _currentSkip.offset = _dataLength;
76 } else {
77 _lastSkip = _currentSkip;
79 _skips = lemur::utility::RVLCompress::decompress_int( _skips, _currentSkip.bound );
80 _skips = lemur::utility::RVLCompress::decompress_int( _skips, _currentSkip.document );
81 _skips = lemur::utility::RVLCompress::decompress_longlong( _skips, _currentSkip.offset );
83 _currentSkip.document += _lastSkip.document;
84 _currentSkip.offset += _lastSkip.offset;
86 assert( _currentSkip.offset <= _dataLength );
90 void moveToSkip() {
91 if( _lastSkip.document == MAX_INT32 ) {
92 _done = true;
93 return;
96 if( _lastSkip.offset > _postings - _postingsStart )
97 _postings = _postingsStart + _lastSkip.offset;
99 assert( _postings <= _postingsEnd );
100 _document = _lastSkip.document;
101 _score = 0;
104 void readDocument() {
105 assert( _postings <= _postingsEnd );
107 if( _postings == _postingsEnd ) {
108 _done = true;
109 } else {
110 int delta;
112 _postings = lemur::utility::RVLCompress::decompress_int( _postings, delta );
113 _postings = lemur::utility::RVLCompress::decompress_int( _postings, _score );
114 _document += delta;
115 assert( _document <= _lastSkip.document || _document > _currentSkip.document || _score <= _currentSkip.bound );
119 public:
120 DocumentOrderedBinnedIterator( const char* data, UINT64 start, UINT64 end ) :
121 _data(data), _start(start), _end(end)
123 load();
126 void load() {
127 const char* header = _data + _start;
129 header = lemur::utility::RVLCompress::decompress_int( header, _options );
130 header = lemur::utility::RVLCompress::decompress_int( header, _documentCount );
131 header = lemur::utility::RVLCompress::decompress_int( header, _listBound );
133 if( (_options & 1) > 0 ) {
134 header = lemur::utility::RVLCompress::decompress_longlong( header, _skipLength );
135 _hasSkips = true;
136 } else {
137 _skipLength = 0;
138 _hasSkips = false;
141 header = lemur::utility::RVLCompress::decompress_longlong( header, _dataLength );
143 _lastSkip.bound = 0;
144 _lastSkip.document = 0;
145 _lastSkip.offset = 0;
146 _currentSkip = _lastSkip;
148 _skips = header;
149 _skipsStart = header;
150 _skipsEnd = _skips + _skipLength;
152 _postings = _skipsEnd;
153 _postingsStart = _postings;
154 _postingsEnd = _postings + _dataLength;
155 _done = false;
156 _document = 0;
157 _score = 0;
159 readSkip();
160 readDocument();
163 UINT64 listByteLength() {
164 return _end - _start;
167 int listBound() {
168 return _listBound;
171 int currentScore() {
172 return _score;
175 int currentDocument() {
176 return _document;
179 int currentBoundDocument() {
180 return _currentSkip.document;
183 int lastBoundDocument() {
184 return _lastSkip.document;
187 int currentBound() {
188 return _currentSkip.bound;
191 void readSkipsTo( int document ) {
192 while( _currentSkip.document < document && _skips != _skipsEnd )
193 readSkip();
196 bool skipToBound( int bound ) {
197 while( _skips != _skipsEnd && _currentSkip.bound < bound )
198 readSkip();
200 if( _document < _lastSkip.document )
201 moveToSkip();
203 while( !_done && _score < bound )
204 readDocument();
206 return !_done && _score < bound;
209 bool skipToDocument( int skipTo ) {
210 while( skipTo > _currentSkip.document )
211 readSkip();
213 if( _document < _lastSkip.document && skipTo > _lastSkip.document )
214 moveToSkip();
216 while( !_done && skipTo > _document )
217 readDocument();
219 return !_done && _document == skipTo;
222 bool skipToDocument( int skipTo, int bound ) {
223 while( skipTo > _currentSkip.document )
224 readSkip();
226 if( _document < _lastSkip.document && skipTo > _lastSkip.document )
227 moveToSkip();
229 if( _currentSkip.bound < bound )
230 return false;
232 while( !_done && skipTo > _document )
233 readDocument();
235 return !_done && _document == skipTo && _score >= bound;
238 void nextDocument() {
239 readDocument();
242 bool isDone() {
243 return _done;
248 #endif // GALAGO_DOCUMENTORDEREDBINNEDITERATOR_HPP