Refactor to avoid warning with GCC 12.2
[xapian.git] / xapian-core / api / omdocument.cc
blob8664ec4cd8cd42f1581a9086688d583ffcf42764
1 /* omdocument.cc: class for performing a match
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2002 Ananova Ltd
5 * Copyright 2003,2004,2006,2007,2008,2009,2011,2013,2014,2018 Olly Betts
6 * Copyright 2009 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #include <config.h>
26 #include <xapian/document.h>
28 #include "backends/document.h"
29 #include "documentvaluelist.h"
30 #include "maptermlist.h"
31 #include "net/serialise.h"
32 #include "overflow.h"
33 #include "str.h"
34 #include "unicode/description_append.h"
36 #include <xapian/error.h>
37 #include <xapian/types.h>
38 #include <xapian/valueiterator.h>
40 #include <algorithm>
41 #include <limits>
42 #include <string>
44 using namespace std;
46 namespace Xapian {
48 // implementation of Document
50 Document::Document(Document::Internal *internal_) : internal(internal_)
54 Document::Document(Document&&) = default;
56 Document&
57 Document::operator=(Document&&) = default;
59 Document::Document() : internal(new Xapian::Document::Internal)
63 string
64 Document::get_value(Xapian::valueno slot) const
66 LOGCALL(API, string, "Document::get_value", slot);
67 RETURN(internal->get_value(slot));
70 string
71 Document::get_data() const
73 LOGCALL(API, string, "Document::get_data", NO_ARGS);
74 RETURN(internal->get_data());
77 void
78 Document::set_data(const string &data)
80 LOGCALL_VOID(API, "Document::set_data", data);
81 internal->set_data(data);
84 void
85 Document::operator=(const Document &other)
87 // pointers are reference counted.
88 internal = other.internal;
91 Document::Document(const Document &other)
92 : internal(other.internal)
96 Document::~Document()
100 string
101 Document::get_description() const
103 return internal->get_description();
106 void
107 Document::add_value(Xapian::valueno slot, const string &value)
109 LOGCALL_VOID(API, "Document::add_value", slot | value);
110 internal->add_value(slot, value);
113 void
114 Document::remove_value(Xapian::valueno slot)
116 LOGCALL_VOID(API, "Document::remove_value", slot);
117 internal->remove_value(slot);
120 void
121 Document::clear_values()
123 LOGCALL_VOID(API, "Document::clear_values", NO_ARGS);
124 internal->clear_values();
127 void
128 Document::add_posting(const string & tname,
129 Xapian::termpos tpos,
130 Xapian::termcount wdfinc)
132 LOGCALL_VOID(API, "Document::add_posting", tname | tpos | wdfinc);
133 if (tname.empty()) {
134 throw InvalidArgumentError("Empty termnames aren't allowed.");
136 internal->add_posting(tname, tpos, wdfinc);
139 void
140 Document::add_term(const string & tname, Xapian::termcount wdfinc)
142 LOGCALL_VOID(API, "Document::add_term", tname | wdfinc);
143 if (tname.empty()) {
144 throw InvalidArgumentError("Empty termnames aren't allowed.");
146 internal->add_term(tname, wdfinc);
149 void
150 Document::remove_posting(const string & tname, Xapian::termpos tpos,
151 Xapian::termcount wdfdec)
153 LOGCALL_VOID(API, "Document::remove_posting", tname | tpos | wdfdec);
154 if (tname.empty()) {
155 throw InvalidArgumentError("Empty termnames aren't allowed.");
157 internal->remove_posting(tname, tpos, wdfdec);
160 Xapian::termpos
161 Document::remove_postings(const string& term,
162 Xapian::termpos termpos_first,
163 Xapian::termpos termpos_last,
164 Xapian::termcount wdf_dec)
166 if (term.empty()) {
167 throw InvalidArgumentError("Empty termnames aren't allowed.");
169 if (rare(termpos_first > termpos_last)) {
170 return 0;
172 return internal->remove_postings(term, termpos_first, termpos_last,
173 wdf_dec);
176 void
177 Document::remove_term(const string & tname)
179 LOGCALL_VOID(API, "Document::remove_term", tname);
180 internal->remove_term(tname);
183 void
184 Document::clear_terms()
186 LOGCALL_VOID(API, "Document::clear_terms", NO_ARGS);
187 internal->clear_terms();
190 Xapian::termcount
191 Document::termlist_count() const {
192 LOGCALL(API, Xapian::termcount, "Document::termlist_count", NO_ARGS);
193 RETURN(internal->termlist_count());
196 TermIterator
197 Document::termlist_begin() const
199 LOGCALL(API, TermIterator, "Document::termlist_begin", NO_ARGS);
200 RETURN(TermIterator(internal->open_term_list()));
203 Xapian::termcount
204 Document::values_count() const {
205 LOGCALL(API, Xapian::termcount, "Document::values_count", NO_ARGS);
206 RETURN(internal->values_count());
209 ValueIterator
210 Document::values_begin() const
212 LOGCALL(API, ValueIterator, "Document::values_begin", NO_ARGS);
213 // Calling values_count() has the side effect of making sure that they have
214 // been read into the std::map "values" member of internal.
215 if (internal->values_count() == 0) RETURN(ValueIterator());
216 RETURN(ValueIterator(new DocumentValueList(internal)));
219 docid
220 Document::get_docid() const
222 LOGCALL(API, docid, "Document::get_docid", NO_ARGS);
223 RETURN(internal->get_docid());
226 std::string
227 Document::serialise() const
229 LOGCALL(API, std::string, "Document::serialise", NO_ARGS);
230 RETURN(serialise_document(*this));
233 Document
234 Document::unserialise(const std::string &s)
236 LOGCALL_STATIC(API, Document, "Document::unserialise", s);
237 RETURN(unserialise_document(s));
242 /////////////////////////////////////////////////////////////////////////////
244 void
245 OmDocumentTerm::merge() const
247 Assert(!is_deleted());
248 inplace_merge(positions.begin(),
249 positions.begin() + split,
250 positions.end());
251 split = 0;
254 bool
255 OmDocumentTerm::add_position(Xapian::termcount wdf_inc, Xapian::termpos tpos)
257 LOGCALL(DB, bool, "OmDocumentTerm::add_position", wdf_inc | tpos);
259 if (rare(is_deleted())) {
260 wdf = wdf_inc;
261 split = 0;
262 positions.push_back(tpos);
263 return true;
266 wdf += wdf_inc;
268 // Optimise the common case of adding positions in ascending order.
269 if (positions.empty()) {
270 positions.push_back(tpos);
271 return false;
273 if (tpos > positions.back()) {
274 if (split) {
275 // Check for duplicate before split.
276 auto i = lower_bound(positions.cbegin(),
277 positions.cbegin() + split,
278 tpos);
279 if (i != positions.cbegin() + split && *i == tpos)
280 return false;
282 positions.push_back(tpos);
283 return false;
286 if (tpos == positions.back()) {
287 // Duplicate of last entry.
288 return false;
291 if (split > 0) {
292 // We could merge in the new entry at the same time, but that seems to
293 // make things much more complex for minor gains.
294 merge();
297 // Search for the position the term occurs at. Use binary chop to
298 // search, since this is a sorted list.
299 vector<Xapian::termpos>::iterator i;
300 i = lower_bound(positions.begin(), positions.end(), tpos);
301 if (i == positions.end() || *i != tpos) {
302 auto new_split = positions.size();
303 if (sizeof(split) < sizeof(Xapian::termpos)) {
304 if (rare(new_split > numeric_limits<decltype(split)>::max())) {
305 // The split point would be beyond the size of the type used to
306 // hold it, which is really unlikely if that type is 32-bit.
307 // Just insert the old way in this case.
308 positions.insert(i, tpos);
309 return false;
311 } else {
312 // This assertion should always be true because we shouldn't have
313 // duplicate entries and the split point can't be after the final
314 // entry.
315 AssertRel(new_split, <=, numeric_limits<decltype(split)>::max());
317 split = new_split;
318 positions.push_back(tpos);
320 return false;
323 void
324 OmDocumentTerm::remove_position(Xapian::termpos tpos)
326 LOGCALL_VOID(DB, "OmDocumentTerm::remove_position", tpos);
328 Assert(!is_deleted());
330 if (rare(positions.empty())) {
331 not_there:
332 throw Xapian::InvalidArgumentError("Position " + str(tpos) +
333 " not in list, can't remove");
336 // Special case removing the final position, which we can handle in O(1).
337 if (positions.back() == tpos) {
338 positions.pop_back();
339 if (split == positions.size()) {
340 split = 0;
341 // We removed the only entry from after the split.
343 return;
346 if (split > 0) {
347 // We could remove the requested entry at the same time, but that seems
348 // fiddly to do.
349 merge();
352 // We keep positions sorted, so use lower_bound() which can binary chop to
353 // find the entry.
354 auto i = lower_bound(positions.begin(), positions.end(), tpos);
355 if (i == positions.end() || *i != tpos) {
356 goto not_there;
358 positions.erase(i);
361 Xapian::termpos
362 OmDocumentTerm::remove_positions(Xapian::termpos termpos_first,
363 Xapian::termpos termpos_last)
365 LOGCALL(DB, Xapian::termpos, "OmDocumentTerm::remove_position", termpos_first | termpos_last);
367 Assert(!is_deleted());
369 if (split > 0) {
370 // We could remove the requested entries at the same time, but that
371 // seems fiddly to do.
372 merge();
375 // Find the range [i, j) that the specified termpos range maps to. Use
376 // binary chop to search, since this is a sorted list.
377 auto i = lower_bound(positions.begin(), positions.end(), termpos_first);
378 if (i == positions.end() || *i > termpos_last) {
379 return 0;
381 auto j = upper_bound(i, positions.end(), termpos_last);
382 size_t size_before = positions.size();
383 positions.erase(i, j);
384 return Xapian::termpos(size_before - positions.size());
387 string
388 OmDocumentTerm::get_description() const
390 string description;
391 description = "OmDocumentTerm(wdf = ";
392 description += str(wdf);
393 description += ", positions[";
394 description += str(positions.size());
395 description += "])";
396 return description;
399 string
400 Xapian::Document::Internal::get_value(Xapian::valueno slot) const
402 if (values_here) {
403 map<Xapian::valueno, string>::const_iterator i;
404 i = values.find(slot);
405 if (i == values.end()) return string();
406 return i->second;
408 if (!database.get()) return string();
409 return do_get_value(slot);
412 string
413 Xapian::Document::Internal::get_data() const
415 LOGCALL(DB, string, "Xapian::Document::Internal::get_data", NO_ARGS);
416 if (data_here) RETURN(data);
417 if (!database.get()) RETURN(string());
418 RETURN(do_get_data());
421 void
422 Xapian::Document::Internal::set_data(const string &data_)
424 data = data_;
425 data_here = true;
428 TermList *
429 Xapian::Document::Internal::open_term_list() const
431 LOGCALL(DB, TermList *, "Document::Internal::open_term_list", NO_ARGS);
432 if (terms_here) {
433 RETURN(new MapTermList(terms.begin(), terms.end()));
435 if (!database.get()) RETURN(NULL);
436 RETURN(database->open_term_list(did));
439 void
440 Xapian::Document::Internal::add_value(Xapian::valueno slot, const string &value)
442 need_values();
443 if (!value.empty()) {
444 values[slot] = value;
445 } else {
446 // Empty values aren't stored, but replace any existing value by
447 // removing it.
448 values.erase(slot);
452 void
453 Xapian::Document::Internal::remove_value(Xapian::valueno slot)
455 need_values();
456 map<Xapian::valueno, string>::iterator i = values.find(slot);
457 if (i == values.end()) {
458 throw Xapian::InvalidArgumentError("Value #" + str(slot) +
459 " is not present in document, in "
460 "Xapian::Document::Internal::remove_value()");
462 values.erase(i);
465 void
466 Xapian::Document::Internal::clear_values()
468 values.clear();
469 values_here = true;
472 void
473 Xapian::Document::Internal::add_posting(const string & tname, Xapian::termpos tpos,
474 Xapian::termcount wdfinc)
476 need_terms();
477 positions_modified = true;
479 map<string, OmDocumentTerm>::iterator i;
480 i = terms.find(tname);
481 if (i == terms.end()) {
482 ++termlist_size;
483 OmDocumentTerm newterm(wdfinc);
484 newterm.append_position(tpos);
485 terms.insert(make_pair(tname, std::move(newterm)));
486 } else {
487 if (i->second.add_position(wdfinc, tpos))
488 ++termlist_size;
492 void
493 Xapian::Document::Internal::add_term(const string & tname, Xapian::termcount wdfinc)
495 need_terms();
497 map<string, OmDocumentTerm>::iterator i;
498 i = terms.find(tname);
499 if (i == terms.end()) {
500 ++termlist_size;
501 OmDocumentTerm newterm(wdfinc);
502 terms.insert(make_pair(tname, std::move(newterm)));
503 } else {
504 if (i->second.increase_wdf(wdfinc))
505 ++termlist_size;
509 void
510 Xapian::Document::Internal::remove_posting(const string & tname,
511 Xapian::termpos tpos,
512 Xapian::termcount wdfdec)
514 need_terms();
516 map<string, OmDocumentTerm>::iterator i;
517 i = terms.find(tname);
518 if (i == terms.end() || i->second.is_deleted()) {
519 if (tname.empty())
520 throw Xapian::InvalidArgumentError("Empty termnames are invalid");
521 throw Xapian::InvalidArgumentError("Term '" + tname +
522 "' is not present in document, in "
523 "Xapian::Document::Internal::remove_posting()");
525 i->second.remove_position(tpos);
526 if (wdfdec) i->second.decrease_wdf(wdfdec);
527 positions_modified = true;
530 Xapian::termpos
531 Xapian::Document::Internal::remove_postings(const string& term,
532 Xapian::termpos termpos_first,
533 Xapian::termpos termpos_last,
534 Xapian::termcount wdf_dec)
536 need_terms();
538 auto i = terms.find(term);
539 if (i == terms.end() || i->second.is_deleted()) {
540 if (term.empty())
541 throw Xapian::InvalidArgumentError("Empty termnames are invalid");
542 throw Xapian::InvalidArgumentError("Term '" + term +
543 "' is not present in document, in "
544 "Xapian::Document::Internal::remove_postings()");
546 auto n_removed = i->second.remove_positions(termpos_first, termpos_last);
547 if (n_removed) {
548 positions_modified = true;
549 if (wdf_dec) {
550 Xapian::termcount wdf_delta;
551 if (mul_overflows(n_removed, wdf_dec, wdf_delta)) {
552 // Decreasing by the maximum value will zero the wdf.
553 wdf_delta = numeric_limits<Xapian::termcount>::max();
555 i->second.decrease_wdf(wdf_delta);
558 return n_removed;
561 void
562 Xapian::Document::Internal::remove_term(const string & tname)
564 need_terms();
565 map<string, OmDocumentTerm>::iterator i;
566 i = terms.find(tname);
567 if (i == terms.end() || i->second.is_deleted()) {
568 if (tname.empty())
569 throw Xapian::InvalidArgumentError("Empty termnames are invalid");
570 throw Xapian::InvalidArgumentError("Term '" + tname +
571 "' is not present in document, in "
572 "Xapian::Document::Internal::remove_term()");
574 --termlist_size;
575 if (!positions_modified) {
576 positions_modified = (i->second.positionlist_count() > 0);
578 i->second.remove();
581 void
582 Xapian::Document::Internal::clear_terms()
584 terms.clear();
585 termlist_size = 0;
586 terms_here = true;
587 // Assume there was a term with positions for now.
588 // FIXME: may be worth checking...
589 positions_modified = true;
592 Xapian::termcount
593 Xapian::Document::Internal::termlist_count() const
595 if (!terms_here) {
596 // How equivalent is this line to the rest?
597 // return database.get() ? database->open_term_list(did)->get_approx_size() : 0;
598 need_terms();
600 Assert(terms_here);
601 return termlist_size;
604 void
605 Xapian::Document::Internal::need_terms() const
607 if (terms_here) return;
608 if (database.get()) {
609 Xapian::TermIterator t(database->open_term_list(did));
610 Xapian::TermIterator tend(NULL);
611 for ( ; t != tend; ++t) {
612 Xapian::PositionIterator p = t.positionlist_begin();
613 OmDocumentTerm term(t.get_wdf());
614 for ( ; p != t.positionlist_end(); ++p) {
615 term.append_position(*p);
617 terms.insert(terms.end(), make_pair(*t, std::move(term)));
620 termlist_size = terms.size();
621 terms_here = true;
624 Xapian::valueno
625 Xapian::Document::Internal::values_count() const
627 LOGCALL(DB, Xapian::valueno, "Document::Internal::values_count", NO_ARGS);
628 need_values();
629 Assert(values_here);
630 RETURN(values.size());
633 string
634 Xapian::Document::Internal::get_description() const
636 string desc = "Document(";
638 // description_append ?
639 if (data_here) {
640 desc += "data='";
641 description_append(desc, data);
642 desc += "'";
645 if (values_here) {
646 if (data_here) desc += ", ";
647 desc += "values[";
648 desc += str(values.size());
649 desc += ']';
652 if (terms_here) {
653 if (data_here || values_here) desc += ", ";
654 desc += "terms[";
655 desc += str(termlist_size);
656 desc += ']';
659 if (database.get()) {
660 if (data_here || values_here || terms_here) desc += ", ";
661 // database->get_description() if/when that returns a non-generic
662 // result.
663 desc += "db:yes";
666 desc += ')';
668 return desc;
671 void
672 Xapian::Document::Internal::need_values() const
674 if (!values_here) {
675 if (database.get()) {
676 Assert(values.empty());
677 do_get_all_values(values);
679 values_here = true;
683 Xapian::Document::Internal::~Internal()
685 if (database.get())
686 database->invalidate_doc_object(this);