Reimplement Language Modelling weights
[xapian.git] / xapian-core / common / bitstream.h
blob9e7166b4e39bed046e290d5910d5b7d2096e7127
1 /** @file
2 * @brief Classes to encode/decode a bitstream.
3 */
4 /* Copyright (C) 2004,2005,2006,2008,2012,2013,2014,2017,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
19 * USA
22 #ifndef XAPIAN_INCLUDED_BITSTREAM_H
23 #define XAPIAN_INCLUDED_BITSTREAM_H
25 #include <xapian/types.h>
27 #include "api/smallvector.h"
29 #include <string>
30 #include <vector>
32 namespace Xapian {
34 /// Create a stream to which non-byte-aligned values can be written.
35 class BitWriter {
36 std::string buf;
37 int n_bits = 0;
38 Xapian::termpos acc = 0;
40 public:
41 /// Construct empty.
42 BitWriter() { }
44 /// Construct with the contents of seed already in the stream.
45 explicit BitWriter(const std::string& seed)
46 : buf(seed) { }
48 /// Encode value, known to be less than outof.
49 void encode(Xapian::termpos value, Xapian::termpos outof);
51 /// Finish encoding and return the encoded data as a std::string.
52 std::string& freeze() {
53 if (n_bits) {
54 buf += char(acc);
55 n_bits = 0;
56 acc = 0;
58 return buf;
61 /// Perform interpolative encoding of pos elements between j and k.
62 void encode_interpolative(const Xapian::VecCOW<Xapian::termpos>& pos,
63 int j, int k);
66 /// Read a stream created by BitWriter.
67 class BitReader {
68 const char* p;
70 const char* end;
72 int n_bits;
74 Xapian::termpos acc;
76 Xapian::termpos read_bits(int count);
78 struct DIStack {
79 int j, k;
80 Xapian::termpos pos_k;
83 struct DIState : public DIStack {
84 Xapian::termpos pos_j;
86 void set_j(int j_, Xapian::termpos pos_j_) {
87 j = j_;
88 pos_j = pos_j_;
90 void set_k(int k_, Xapian::termpos pos_k_) {
91 k = k_;
92 pos_k = pos_k_;
94 void uninit() {
95 j = 1;
96 k = 0;
98 DIState() { uninit(); }
99 DIState(int j_, int k_,
100 Xapian::termpos pos_j_, Xapian::termpos pos_k_) {
101 set_j(j_, pos_j_);
102 set_k(k_, pos_k_);
104 void operator=(const DIStack& o) {
105 j = o.j;
106 set_k(o.k, o.pos_k);
108 bool is_next() const { return j + 1 < k; }
109 bool is_initialized() const {
110 return j <= k;
112 // Given pos[j] = pos_j and pos[k] = pos_k, how many possible position
113 // values are there for the value midway between?
114 Xapian::termpos outof() const {
115 return pos_k - pos_j - Xapian::termpos(k - j) + 1;
119 std::vector<DIStack> di_stack;
120 DIState di_current;
122 public:
123 // Construct.
124 BitReader() { }
126 // Construct and set data.
127 BitReader(const char* p_, const char* end_)
128 : p(p_), end(end_), n_bits(0), acc(0) { }
130 // Initialise with fresh data.
131 void init(const char* p_, const char* end_) {
132 p = p_;
133 end = end_;
134 n_bits = 0;
135 acc = 0;
136 di_stack.clear();
137 di_current.uninit();
140 // Decode value, known to be less than outof.
141 Xapian::termpos decode(Xapian::termpos outof, bool force = false);
143 // Check all the data has been read. Because it'll be zero padded
144 // to fill a byte, the best we can actually do is check that
145 // there's less than a byte left and that all remaining bits are
146 // zero.
147 bool check_all_gone() const {
148 return (p == end && n_bits <= 7 && acc == 0);
151 /// Perform interpolative decoding between elements between j and k.
152 void decode_interpolative(int j, int k,
153 Xapian::termpos pos_j, Xapian::termpos pos_k);
155 /// Perform on-demand interpolative decoding.
156 Xapian::termpos decode_interpolative_next();
161 using Xapian::BitWriter;
162 using Xapian::BitReader;
164 #endif // XAPIAN_INCLUDED_BITSTREAM_H