3 // InvertedListIterator
5 // 6 January 2007 -- tds
8 #include "BinOrderedBinnedIterator.hpp"
9 #include "BinnedFeatureIndex.hpp"
10 #include "UncompressedRead.hpp"
11 #include "lemur/RVLCompress.hpp"
12 #include "indri/RVLDecompressStream.hpp"
15 // InvertedListIterator constructor
18 BinOrderedBinnedIterator::BinOrderedBinnedIterator( const char* data
, UINT64 start
, UINT64 end
)
22 _readTermHeader( start
, end
);
29 int BinOrderedBinnedIterator::bin() {
30 return _currentBinInfo().bin
;
37 bool BinOrderedBinnedIterator::next() {
39 if (_termBinIndex
== _termBins
.size()) {
51 void BinOrderedBinnedIterator::reset() {
59 const BinOrderedBinnedIterator::BinInfo
& BinOrderedBinnedIterator::_currentBinInfo() const {
60 return _termBins
[_termBinIndex
];
67 const char* BinOrderedBinnedIterator::binData() {
68 const BinInfo
& info
= _currentBinInfo();
69 return invertedData
+ info
.dataStart
;
76 UINT64
BinOrderedBinnedIterator::binDataLength() {
77 const BinInfo
& info
= _currentBinInfo();
78 return info
.dataLength
;
85 const char* BinOrderedBinnedIterator::skipData() {
86 const BinInfo
& info
= _currentBinInfo();
87 return invertedData
+ info
.skipStart
;
94 UINT64
BinOrderedBinnedIterator::skipDataLength() {
95 const BinInfo
& info
= _currentBinInfo();
96 return info
.skipLength
;
100 // postingsDataLength
103 UINT64
BinOrderedBinnedIterator::postingsDataLength() const {
104 return _postingsLength
;
111 UINT64
BinOrderedBinnedIterator::documentCount() const {
112 return _documentCount
;
119 void BinOrderedBinnedIterator::_readTermHeader( UINT64 startPosition
, UINT64 endPosition
) {
120 const char* headerStart
= invertedData
+ startPosition
;
121 const char* headerNext
;
125 headerNext
= lemur::utility::RVLCompress::decompress_int( headerStart
, headerLength
);
126 headerNext
= lemur::utility::RVLCompress::decompress_int( headerNext
, options
);
127 headerNext
= lemur::utility::RVLCompress::decompress_longlong( headerNext
, _documentCount
);
128 bool useSkips
= ((options
& 0x01) != 0);
130 indri::utility::RVLDecompressStream
stream( headerNext
, headerLength
);
131 headerLength
+= (headerNext
- headerStart
);
132 UINT64 totalBinLength
= 0;
135 while( !stream
.done() ) {
137 int termSkipLength
= 0;
141 if( useSkips
) stream
>> termSkipLength
;
142 stream
>> termBinLength
;
147 info
.binStart
= startPosition
+ headerLength
+ totalBinLength
;
148 info
.skipStart
= info
.binStart
;
149 info
.skipLength
= termSkipLength
;
150 info
.dataStart
= info
.skipStart
+ info
.skipLength
;
151 info
.dataLength
= termBinLength
;
152 info
.binLength
= info
.skipLength
+ info
.dataLength
;
154 _termBins
.push_back(info
);
155 totalBinLength
+= info
.binLength
;
156 _postingsLength
+= info
.dataLength
;
159 assert( startPosition
+ headerLength
+ totalBinLength
== endPosition
);
164 // _word_block_end_offset
167 static UINT16
_word_block_end_offset( const UINT16
* wordBlockEnds
, int index
) {
171 return UncompressedRead::peek_u16( (const UINT8
*) (&wordBlockEnds
[index
-1]) );