Refactor to avoid warning with GCC 12.2
[xapian.git] / xapian-core / api / documentterm.h
blobd9d8066e61fdc06d19d65ca4310cbb293b21b77b
1 /** @file
2 * @brief internal class representing a term in a modified document
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2003,2007,2018 Olly Betts
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #ifndef OM_HGUARD_DOCUMENTTERM_H
25 #define OM_HGUARD_DOCUMENTTERM_H
27 #include "debuglog.h"
29 #include <string>
30 #include <vector>
32 #include <xapian/types.h>
34 using namespace std;
36 /// A term in a document.
37 class OmDocumentTerm {
38 public:
39 /** Make a new term.
41 * @param wdf_ Initial wdf.
43 explicit OmDocumentTerm(Xapian::termcount wdf_)
44 : wdf(wdf_)
46 LOGCALL_CTOR(DB, "OmDocumentTerm", wdf_);
49 /** Within document frequency of the term.
50 * This is the number of occurrences of the term in the document.
52 Xapian::termcount wdf;
54 /** Split point in the position range.
56 * To allow more efficient insertion of positions, we support the
57 * positions being split into two sorted ranges, and if this is the
58 * case, split will be > 0 and there will be two sorted ranges [0, split)
59 * and [split, positions.size()).
61 * If split is 0, then [0, positions.size()) form a single sorted range.
63 * If positions.empty(), then split > 0 indicates that the term has been
64 * deleted (this allows us to delete terms without invalidating existing
65 * TermIterator objects).
67 * Use type unsigned here to avoid bloating this structure. More than
68 * 4 billion positions in one document is not sensible (and not possible
69 * unless termpos is configured to be 64 bit).
71 mutable unsigned split = 0;
73 /** Merge sorted ranges before and after @a split. */
74 void merge() const;
76 typedef vector<Xapian::termpos> term_positions;
78 private:
79 /** Positional information.
81 * This is a list of positions at which the term occurs in the
82 * document. The list is in strictly increasing order of term
83 * position.
85 * The positions start at 1.
87 * Note that, even if positional information is present, the WDF might
88 * not be equal to the length of the position list, since a term might
89 * occur multiple times at a single position, but will only have one
90 * entry in the position list for each position.
92 mutable term_positions positions;
94 public:
95 const term_positions* get_vector_termpos() const {
96 merge();
97 return &positions;
100 Xapian::termcount positionlist_count() const {
101 return positions.size();
104 void remove() {
105 positions.clear();
106 split = 1;
109 /** Add a position.
111 * If @a termpos is already present, this is a no-op.
113 * @param wdf_inc wdf increment
114 * @param termpos Position to add
116 * @return true if the term was flagged as deleted before the operation.
118 bool add_position(Xapian::termcount wdf_inc, Xapian::termpos termpos);
120 /** Append a position.
122 * The position must be >= the largest currently in the list.
124 void append_position(Xapian::termpos termpos) {
125 positions.push_back(termpos);
128 /** Remove an entry from the position list.
130 * This removes an entry from the list of positions.
132 * This does not change the value of the wdf.
134 * @exception Xapian::InvalidArgumentError is thrown if the position does
135 * not occur in the position list.
137 void remove_position(Xapian::termpos tpos);
139 /** Remove a range of positions.
141 * @param termpos_first First position to remove
142 * @param termpos_last Last position to remove
144 * It's OK if there are no positions in the specified range.
146 * @return the number of positions removed.
148 Xapian::termpos remove_positions(Xapian::termpos termpos_first,
149 Xapian::termpos termpos_last);
151 /** Increase within-document frequency.
153 * @return true if the term was flagged as deleted before the operation.
155 bool increase_wdf(Xapian::termcount delta) {
156 if (rare(is_deleted())) {
157 split = 0;
158 wdf = delta;
159 return true;
161 wdf += delta;
162 return false;
165 /// Decrease within-document frequency.
166 void decrease_wdf(Xapian::termcount delta) {
167 // Saturating arithmetic - don't let the wdf go below zero.
168 if (wdf >= delta) {
169 wdf -= delta;
170 } else {
171 wdf = 0;
175 /// Get the wdf
176 Xapian::termcount get_wdf() const { return wdf; }
178 /** Has this term been deleted from this document?
180 * We flag entries as deleted instead of actually deleting them to avoid
181 * invalidating existing TermIterator objects.
183 bool is_deleted() const { return positions.empty() && split > 0; }
185 /// Return a string describing this object.
186 string get_description() const;
189 #endif // OM_HGUARD_DOCUMENTTERM_H