Initial import into git.
[galago.git] / cpp / galago / src / DocumentOrderedBinnedRetrieval.cpp
blobbfcf565348940e4be3932144ebe4590a02d4b7a8
2 //
3 // DocumentOrderedBinnedRetrieval
4 //
5 // 15 October 2007 -- tds
6 //
8 #include "DocumentOrderedBinnedRetrieval.hpp"
9 #include "indri/ScoredExtentResult.hpp"
10 #include "Query.hpp"
11 #include <vector>
12 #include <queue>
13 #include <assert.h>
14 #include "lemur/Exception.hpp"
15 #include "Logging.hpp"
18 // openRead
21 void DocumentOrderedBinnedRetrieval::openRead( const std::string& indexPath ) {
22 _index.openRead( indexPath );
26 // runQuery
29 std::vector<indri::api::ScoredExtentResult>
30 DocumentOrderedBinnedRetrieval::runQuery( const std::vector<QueryTerm>& terms,
31 int requested,
32 int threshold ) {
33 std::priority_queue<indri::api::ScoredExtentResult> results;
35 std::vector<DocumentOrderedBinnedIterator*> iterators = getIterators( terms );
36 galago_log_query_terms( iterators.size() );
38 while (true) {
39 // determine which document to score
40 int document = MAX_INT32;
42 for( int i=0; i<iterators.size(); i++ ) {
43 DocumentOrderedBinnedIterator* iterator = iterators[i];
44 if( iterator->isDone() )
45 continue;
47 document = std::min( document, iterator->currentDocument() );
50 if( document == MAX_INT32 )
51 break;
53 // move all iterators to the document and score
54 int score = 0;
56 for( int i=0; i<iterators.size(); i++ ) {
57 DocumentOrderedBinnedIterator* iterator = iterators[i];
59 iterator->skipToDocument( document );
60 if( iterator->isDone() || iterator->currentDocument() != document )
61 continue;
63 score += iterator->currentScore();
66 // store this result
67 results.push( indri::api::ScoredExtentResult( score, document, 0, 0 ) );
69 while( results.size() > requested )
70 results.pop();
72 // move the matching iterators forward
73 for( int i=0; i<iterators.size(); i++ ) {
74 DocumentOrderedBinnedIterator* iterator = iterators[i];
76 if( !iterator->isDone() && iterator->currentDocument() == document )
77 iterator->nextDocument();
81 // convert priority queue into real results
82 std::vector<indri::api::ScoredExtentResult> vectorResults;
84 while( results.size() > 0 ) {
85 vectorResults.push_back( results.top() );
86 results.pop();
89 std::sort( vectorResults.begin(), vectorResults.end() );
90 return vectorResults;
94 // getIterators
97 std::vector<DocumentOrderedBinnedIterator*>
98 DocumentOrderedBinnedRetrieval::getIterators( const std::vector<QueryTerm>& terms ) {
99 std::vector<DocumentOrderedBinnedIterator*> iterators;
101 for( int i=0; i<terms.size(); i++ ) {
102 const QueryTerm& term = terms[i];
103 DocumentOrderedBinnedIterator* iterator = _index.getTerm( term.text, term.field );
105 if( iterator )
106 iterators.push_back( iterator );
109 return iterators;
113 // getDocument
116 std::string DocumentOrderedBinnedRetrieval::getDocument( int doc ) {
117 return _index.getDocument(doc);
121 // close
124 void DocumentOrderedBinnedRetrieval::close() {
125 _index.close();