2 * @brief internal class representing a term in a modified document
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002 Ananova Ltd
6 * Copyright 2003,2007,2018 Olly Betts
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
24 #ifndef OM_HGUARD_DOCUMENTTERM_H
25 #define OM_HGUARD_DOCUMENTTERM_H
32 #include <xapian/types.h>
36 /// A term in a document.
37 class OmDocumentTerm
{
41 * @param wdf_ Initial wdf.
43 explicit OmDocumentTerm(Xapian::termcount wdf_
)
46 LOGCALL_CTOR(DB
, "OmDocumentTerm", wdf_
);
49 /** Within document frequency of the term.
50 * This is the number of occurrences of the term in the document.
52 Xapian::termcount wdf
;
54 /** Split point in the position range.
56 * To allow more efficient insertion of positions, we support the
57 * positions being split into two sorted ranges, and if this is the
58 * case, split will be > 0 and there will be two sorted ranges [0, split)
59 * and [split, positions.size()).
61 * If split is 0, then [0, positions.size()) form a single sorted range.
63 * If positions.empty(), then split > 0 indicates that the term has been
64 * deleted (this allows us to delete terms without invalidating existing
65 * TermIterator objects).
67 * Use type unsigned here to avoid bloating this structure. More than
68 * 4 billion positions in one document is not sensible (and not possible
69 * unless termpos is configured to be 64 bit).
71 mutable unsigned split
= 0;
73 /** Merge sorted ranges before and after @a split. */
76 typedef vector
<Xapian::termpos
> term_positions
;
79 /** Positional information.
81 * This is a list of positions at which the term occurs in the
82 * document. The list is in strictly increasing order of term
85 * The positions start at 1.
87 * Note that, even if positional information is present, the WDF might
88 * not be equal to the length of the position list, since a term might
89 * occur multiple times at a single position, but will only have one
90 * entry in the position list for each position.
92 mutable term_positions positions
;
95 const term_positions
* get_vector_termpos() const {
100 Xapian::termcount
positionlist_count() const {
101 return positions
.size();
111 * If @a termpos is already present, this is a no-op.
113 * @param wdf_inc wdf increment
114 * @param termpos Position to add
116 * @return true if the term was flagged as deleted before the operation.
118 bool add_position(Xapian::termcount wdf_inc
, Xapian::termpos termpos
);
120 /** Append a position.
122 * The position must be >= the largest currently in the list.
124 void append_position(Xapian::termpos termpos
) {
125 positions
.push_back(termpos
);
128 /** Remove an entry from the position list.
130 * This removes an entry from the list of positions.
132 * This does not change the value of the wdf.
134 * @exception Xapian::InvalidArgumentError is thrown if the position does
135 * not occur in the position list.
137 void remove_position(Xapian::termpos tpos
);
139 /** Remove a range of positions.
141 * @param termpos_first First position to remove
142 * @param termpos_last Last position to remove
144 * It's OK if there are no positions in the specified range.
146 * @return the number of positions removed.
148 Xapian::termpos
remove_positions(Xapian::termpos termpos_first
,
149 Xapian::termpos termpos_last
);
151 /** Increase within-document frequency.
153 * @return true if the term was flagged as deleted before the operation.
155 bool increase_wdf(Xapian::termcount delta
) {
156 if (rare(is_deleted())) {
165 /// Decrease within-document frequency.
166 void decrease_wdf(Xapian::termcount delta
) {
167 // Saturating arithmetic - don't let the wdf go below zero.
176 Xapian::termcount
get_wdf() const { return wdf
; }
178 /** Has this term been deleted from this document?
180 * We flag entries as deleted instead of actually deleting them to avoid
181 * invalidating existing TermIterator objects.
183 bool is_deleted() const { return positions
.empty() && split
> 0; }
185 /// Return a string describing this object.
186 string
get_description() const;
189 #endif // OM_HGUARD_DOCUMENTTERM_H