[Tcl] Fix probe for TCL_LIB
[xapian.git] / xapian-letor / feature / colltfcolllenfeature.cc
blob4639c52f8da91c9c327b5a18aff0e7e8d4e2a5b8
1 /** @file
2 * @brief CollTfCollLenFeature class
3 */
4 /* Copyright (C) 2012 Parth Gupta
5 * Copyright (C) 2016 Ayush Tomar
6 * Copyright (C) 2019 Vaibhav Kansagara
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <config.h>
25 #include "xapian-letor/feature.h"
26 #include "api/feature_internal.h"
28 #include "debuglog.h"
29 #include "stringutils.h"
31 using namespace std;
33 namespace Xapian {
35 string
36 CollTfCollLenFeature::name() const
38 return "CollTfCollLenFeature";
41 /** A helper function for feature->get_value()
43 * Checks if the term belongs to the title or is stemmed from the title.
45 static inline bool
46 is_title_term(const std::string& term)
48 return startswith(term, 'S') || startswith(term, "ZS");
51 vector<double>
52 CollTfCollLenFeature::get_values() const
54 LOGCALL(API, vector<double>, "CollTfCollLenFeature::get_values", NO_ARGS);
56 vector<double> values;
57 double value = 0;
58 double coll_len = internal->get_collection_length("title");
60 Xapian::Query feature_query = internal->get_query();
61 for (Xapian::TermIterator qt = feature_query.get_unique_terms_begin();
62 qt != feature_query.get_terms_end(); ++qt) {
63 if (is_title_term((*qt))) {
64 double tf = internal->get_collection_termfreq(*qt);
65 value += log10(1 + (coll_len / (1 + tf)));
68 values.push_back(value);
69 value = 0;
70 coll_len = internal->get_collection_length("body");
72 for (Xapian::TermIterator qt = feature_query.get_unique_terms_begin();
73 qt != feature_query.get_terms_end(); ++qt) {
74 if (!is_title_term((*qt))) {
75 double tf = internal->get_collection_termfreq(*qt);
76 value += log10(1 + (coll_len / (1 + tf)));
79 values.push_back(value);
80 value = 0;
81 coll_len = internal->get_collection_length("whole");
83 for (Xapian::TermIterator qt = feature_query.get_unique_terms_begin();
84 qt != feature_query.get_terms_end(); ++qt) {
85 double tf = internal->get_collection_termfreq(*qt);
86 value += log10(1 + (coll_len / (1 + tf)));
88 values.push_back(value);
90 return values;