5 // 23 February 2007 -- tds
8 #ifndef GALAGO_WIDEACCUMULATOR_HPP
9 #define GALAGO_WIDEACCUMULATOR_HPP
11 #define WA_TERM_BITS (64)
12 #define WA_TERM_MAX_SCORE (MAX_INT32)
14 class WideAccumulator
{
21 typedef WideAccumulator update_type
;
22 typedef UINT64 term_type
;
25 _document(0), _score(0), _terms(0)
29 WideAccumulator( const UINT32 document
, const UINT32 terms
, const UINT64 score
) {
35 WideAccumulator( const UINT32 document
, const update_type update
) {
37 _terms
= update
.terms();
38 _score
= update
.score();
41 bool operator< ( const WideAccumulator
& other
) const {
42 return score() < other
.score();
45 inline UINT32
document() const {
49 inline UINT32
score() const {
53 inline UINT32
terms() const {
57 inline void update( const update_type update
) {
58 _terms
|= update
.terms();
59 _score
+= update
.score();
62 inline UINT32
unseen( const std::vector
<UINT32
>& fullUnseen
) const {
65 for( int b
=0; b
<8; b
++ ) {
66 UINT32 bits
= (_terms
>> (b
*8)) & 0xFF;
67 result
+= fullUnseen
[ (b
<<8) | bits
];
73 inline bool containsTerm( const term_type termBit
) const {
74 return (termBit
& _terms
) ? true : false;
77 static int maxScore() {
78 return WA_TERM_MAX_SCORE
;
81 static int maxTerms() {
85 static term_type
buildTerm( const int termIndex
) {
86 return (1ULL<<termIndex
);
89 static term_type
buildNullTerm() {
93 static update_type
buildUpdate( const int termIndex
, const UINT32 score
) {
94 WideAccumulator result
;
95 result
._terms
= buildTerm( termIndex
);
96 result
._score
= score
;
100 static void computeUnseenScoreArray( std::vector
<UINT32
>& fullUnseen
, const std::vector
<UINT32
>& maxUnseen
) {
101 // fullUnseen is a precomputed array that helps make our job easier
102 // when computing unseen statistics.
103 // For WideAccumulators, we use 2048 (256*8) entries.
104 // The low 8-bits of the index into this array is a bitmap. The upper 3 bits
105 // are a byte index. This code probably makes more sense if you look at the unseen method too.
107 fullUnseen
.resize( 2048, 0 );
109 // b selects one byte from the terms bitmap
110 for( UINT32 b
= 0; b
< 8; b
++ ) {
111 // bits iterates over all possible settings of 8 bits
112 for( UINT32 bits
= 0; bits
< 256; bits
++ ) {
113 fullUnseen
[ (b
<<8) | bits
] = 0;
115 for( UINT32 bitIndex
= 0; bitIndex
< 8; bitIndex
++ ) {
116 UINT32 bit
= 1<<bitIndex
;
117 UINT32 term
= bitIndex
+ b
*8;
119 if( (bits
& bit
) == 0 && term
< maxUnseen
.size() ) {
120 fullUnseen
[ (b
<<8) | bits
] += maxUnseen
[ term
];
130 class AccumulatorTypes
<WideAccumulator
> {
132 typedef WideAccumulator update_type
;
133 typedef UINT64 term_type
;
136 #endif // GALAGO_LONGACCUMULATOR_HPP