Initial import into git.
[galago.git] / cpp / galago / include / WideAccumulator.hpp
blob537653f8cafb9949c576f062360468c374234906
2 //
3 // WideAccumulator
4 //
5 // 23 February 2007 -- tds
6 //
8 #ifndef GALAGO_WIDEACCUMULATOR_HPP
9 #define GALAGO_WIDEACCUMULATOR_HPP
11 #define WA_TERM_BITS (64)
12 #define WA_TERM_MAX_SCORE (MAX_INT32)
14 class WideAccumulator {
15 private:
16 UINT32 _document;
17 UINT32 _score;
18 UINT64 _terms;
20 public:
21 typedef WideAccumulator update_type;
22 typedef UINT64 term_type;
24 WideAccumulator() :
25 _document(0), _score(0), _terms(0)
29 WideAccumulator( const UINT32 document, const UINT32 terms, const UINT64 score ) {
30 _document = document;
31 _terms = terms;
32 _score = score;
35 WideAccumulator( const UINT32 document, const update_type update ) {
36 _document = document;
37 _terms = update.terms();
38 _score = update.score();
41 bool operator< ( const WideAccumulator& other ) const {
42 return score() < other.score();
45 inline UINT32 document() const {
46 return _document;
49 inline UINT32 score() const {
50 return _score;
53 inline UINT32 terms() const {
54 return _terms;
57 inline void update( const update_type update ) {
58 _terms |= update.terms();
59 _score += update.score();
62 inline UINT32 unseen( const std::vector<UINT32>& fullUnseen ) const {
63 UINT32 result = 0;
65 for( int b=0; b<8; b++ ) {
66 UINT32 bits = (_terms >> (b*8)) & 0xFF;
67 result += fullUnseen[ (b<<8) | bits ];
70 return result;
73 inline bool containsTerm( const term_type termBit ) const {
74 return (termBit & _terms) ? true : false;
77 static int maxScore() {
78 return WA_TERM_MAX_SCORE;
81 static int maxTerms() {
82 return WA_TERM_BITS;
85 static term_type buildTerm( const int termIndex ) {
86 return (1ULL<<termIndex);
89 static term_type buildNullTerm() {
90 return 0;
93 static update_type buildUpdate( const int termIndex, const UINT32 score ) {
94 WideAccumulator result;
95 result._terms = buildTerm( termIndex );
96 result._score = score;
97 return result;
100 static void computeUnseenScoreArray( std::vector<UINT32>& fullUnseen, const std::vector<UINT32>& maxUnseen ) {
101 // fullUnseen is a precomputed array that helps make our job easier
102 // when computing unseen statistics.
103 // For WideAccumulators, we use 2048 (256*8) entries.
104 // The low 8-bits of the index into this array is a bitmap. The upper 3 bits
105 // are a byte index. This code probably makes more sense if you look at the unseen method too.
107 fullUnseen.resize( 2048, 0 );
109 // b selects one byte from the terms bitmap
110 for( UINT32 b = 0; b < 8; b++ ) {
111 // bits iterates over all possible settings of 8 bits
112 for( UINT32 bits = 0; bits < 256; bits++ ) {
113 fullUnseen[ (b<<8) | bits ] = 0;
115 for( UINT32 bitIndex = 0; bitIndex < 8; bitIndex++ ) {
116 UINT32 bit = 1<<bitIndex;
117 UINT32 term = bitIndex + b*8;
119 if( (bits & bit) == 0 && term < maxUnseen.size() ) {
120 fullUnseen[ (b<<8) | bits ] += maxUnseen[ term ];
129 template<>
130 class AccumulatorTypes<WideAccumulator> {
131 public:
132 typedef WideAccumulator update_type;
133 typedef UINT64 term_type;
136 #endif // GALAGO_LONGACCUMULATOR_HPP