Initial import into git.
[galago.git] / cpp / galago / src / BinOrderedBinnedIterator.cpp
blob6dfe7f0f0d6e736b43e09a713011b13c28128f64
2 //
3 // InvertedListIterator
4 //
5 // 6 January 2007 -- tds
6 //
8 #include "BinOrderedBinnedIterator.hpp"
9 #include "BinnedFeatureIndex.hpp"
10 #include "UncompressedRead.hpp"
11 #include "lemur/RVLCompress.hpp"
12 #include "indri/RVLDecompressStream.hpp"
15 // InvertedListIterator constructor
18 BinOrderedBinnedIterator::BinOrderedBinnedIterator( const char* data, UINT64 start, UINT64 end )
20 invertedData(data)
22 _readTermHeader( start, end );
26 // bin
29 int BinOrderedBinnedIterator::bin() {
30 return _currentBinInfo().bin;
34 // next
37 bool BinOrderedBinnedIterator::next() {
38 _termBinIndex++;
39 if (_termBinIndex == _termBins.size()) {
40 _termBinIndex--;
41 return false;
44 return true;
48 // reset
51 void BinOrderedBinnedIterator::reset() {
52 _termBinIndex = -1;
56 // _currentBinInfo
59 const BinOrderedBinnedIterator::BinInfo& BinOrderedBinnedIterator::_currentBinInfo() const {
60 return _termBins[_termBinIndex];
64 // binData
67 const char* BinOrderedBinnedIterator::binData() {
68 const BinInfo& info = _currentBinInfo();
69 return invertedData + info.dataStart;
73 // binLength
76 UINT64 BinOrderedBinnedIterator::binDataLength() {
77 const BinInfo& info = _currentBinInfo();
78 return info.dataLength;
82 // skipData
85 const char* BinOrderedBinnedIterator::skipData() {
86 const BinInfo& info = _currentBinInfo();
87 return invertedData + info.skipStart;
91 // skipDataLength
94 UINT64 BinOrderedBinnedIterator::skipDataLength() {
95 const BinInfo& info = _currentBinInfo();
96 return info.skipLength;
100 // postingsDataLength
103 UINT64 BinOrderedBinnedIterator::postingsDataLength() const {
104 return _postingsLength;
108 // documentCount
111 UINT64 BinOrderedBinnedIterator::documentCount() const {
112 return _documentCount;
116 // _readTermHeader
119 void BinOrderedBinnedIterator::_readTermHeader( UINT64 startPosition, UINT64 endPosition ) {
120 const char* headerStart = invertedData + startPosition;
121 const char* headerNext;
122 int headerLength;
123 int options;
125 headerNext = lemur::utility::RVLCompress::decompress_int( headerStart, headerLength );
126 headerNext = lemur::utility::RVLCompress::decompress_int( headerNext, options );
127 headerNext = lemur::utility::RVLCompress::decompress_longlong( headerNext, _documentCount );
128 bool useSkips = ((options & 0x01) != 0);
130 indri::utility::RVLDecompressStream stream( headerNext, headerLength );
131 headerLength += (headerNext - headerStart);
132 UINT64 totalBinLength = 0;
133 _postingsLength = 0;
135 while( !stream.done() ) {
136 int termBin;
137 int termSkipLength = 0;
138 int termBinLength;
140 stream >> termBin;
141 if( useSkips ) stream >> termSkipLength;
142 stream >> termBinLength;
144 BinInfo info;
146 info.bin = termBin;
147 info.binStart = startPosition + headerLength + totalBinLength;
148 info.skipStart = info.binStart;
149 info.skipLength = termSkipLength;
150 info.dataStart = info.skipStart + info.skipLength;
151 info.dataLength = termBinLength;
152 info.binLength = info.skipLength + info.dataLength;
154 _termBins.push_back(info);
155 totalBinLength += info.binLength;
156 _postingsLength += info.dataLength;
159 assert( startPosition + headerLength + totalBinLength == endPosition );
160 _termBinIndex = -1;
164 // _word_block_end_offset
167 static UINT16 _word_block_end_offset( const UINT16* wordBlockEnds, int index ) {
168 if ( index == 0 ) {
169 return 0;
170 } else {
171 return UncompressedRead::peek_u16( (const UINT8*) (&wordBlockEnds[index-1]) );