2 * @brief IdfFeature class
4 /* Copyright (C) 2012 Parth Gupta
5 * Copyright (C) 2016 Ayush Tomar
6 * Copyright (C) 2019 Vaibhav Kansagara
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "xapian-letor/feature.h"
26 #include "api/feature_internal.h"
29 #include "stringutils.h"
36 IdfFeature::name() const
41 /** A helper function for feature->get_value()
43 * Checks if the term belongs to the title or is stemmed from the title.
46 is_title_term(const std::string
& term
)
48 return startswith(term
, 'S') || startswith(term
, "ZS");
52 IdfFeature::get_values() const
54 LOGCALL(API
, vector
<double>, "IdfFeature::get_values", NO_ARGS
);
56 vector
<double> values
;
59 Xapian::Query feature_query
= internal
->get_query();
60 for (Xapian::TermIterator qt
= feature_query
.get_unique_terms_begin();
61 qt
!= feature_query
.get_terms_end(); ++qt
) {
62 if (is_title_term((*qt
))) {
63 double idf
= internal
->get_inverse_doc_freq(*qt
);
64 value
+= log10(1 + idf
);
67 values
.push_back(value
);
70 for (Xapian::TermIterator qt
= feature_query
.get_unique_terms_begin();
71 qt
!= feature_query
.get_terms_end(); ++qt
) {
72 if (!is_title_term((*qt
))) {
73 double idf
= internal
->get_inverse_doc_freq(*qt
);
74 value
+= log10(1 + idf
);
77 values
.push_back(value
);
80 for (Xapian::TermIterator qt
= feature_query
.get_unique_terms_begin();
81 qt
!= feature_query
.get_terms_end(); ++qt
) {
82 double idf
= internal
->get_inverse_doc_freq(*qt
);
83 value
+= log10(1 + idf
);
85 values
.push_back(value
);