Add README
[xapian-trec.git] / config_file.cc
blob6078b5c74855c9cb20e167d72bafcc23df273463
1 /* config_file.cc: configuration load for trec experiments
3 * ----START-LICENCE----
4 * Copyright 2003 Andy MacFarlane, City University
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
19 * USA
20 * -----END-LICENCE-----
23 #include <iostream>
24 #include <fstream>
25 #include <algorithm>
26 #include <string>
27 #include <xapian.h>
28 #include "config_file.h"
29 #include "split.h"
31 using namespace Xapian;
32 using namespace std;
34 void CONFIG_TREC::record_tag( string config_tag, string config_value ) {
36 int found=0;
38 if( config_tag == "textfile" ) {
39 textfile = config_value;
40 found = 1;
41 } // END if
42 if( config_tag == "stopsfile" ) {
43 stopsfile = config_value;
44 found = 1;
45 } // END if
46 if( config_tag == "language" ) {
47 language = config_value;
48 found = 1;
49 } // END if
50 if( config_tag == "db" ) {
51 db = config_value;
52 found = 1;
53 } // END if
54 if( config_tag == "querytype" ) {
55 querytype = config_value;
56 found = 1;
57 } // END if
58 if( config_tag == "queryfile" ) {
59 queryfile = config_value;
60 found = 1;
61 } // END if
62 if( config_tag == "resultsfile" ) {
63 resultsfile = config_value;
64 found = 1;
65 } // END if
66 if( config_tag == "transfile" ) {
67 transfile = config_value;
68 found = 1;
69 } // END if
70 if( config_tag == "noresults" ) {
71 noresults = atoi(config_value.c_str());
72 found = 1;
73 } // END if
74 if( config_tag == "const_k1" ) {
75 const_k1 = atof(config_value.c_str());
76 found = 1;
77 } // END if
78 if( config_tag == "const_b" ) {
79 const_b = atof(config_value.c_str());
80 found = 1;
81 } // END if
82 if( config_tag == "topicfile" ) {
83 topicfile = config_value;
84 found = 1;
85 } // END if
86 if( config_tag == "topicfields" ) {
87 topicfields = config_value;
88 found = 1;
89 } // END if
90 if( config_tag == "relfile" ) {
91 relfile = config_value;
92 found = 1;
93 } // END if
94 if( config_tag == "runname" ) {
95 runname = config_value;
96 found = 1;
97 } // END if
98 if( config_tag == "nterms" ) {
99 nterms = atoi(config_value.c_str());
100 found = 1;
101 } // END if
103 if( !found ) {
104 cout << "ERROR: could not locate tag [" << config_tag << "] for value [" << config_value
105 << "]" << endl;
106 } // END if
108 } // END record_tag
110 void CONFIG_TREC::setup_config( string filename ) {
112 // set defaults
113 textfile = "noneassigned"; // must enter a file/dir for text
114 language = "english"; // corpus language
115 db = "noneassigned"; // must enter path of database
116 querytype = "n"; // type of query: default is n=normal
117 queryfile = "noneassigned"; // must enter path/filename of query file
118 resultsfile = "trec.log"; // path/filename of results file
119 transfile = "transaction.log"; // transaction log file (timings etc)
120 noresults = 1000; // no of results to save in results log file
121 const_k1 = 1.2; // value for K1 constant (BM25)
122 const_b = 0.75; // value for B constant (BM25)
123 topicfile = "noneassigned"; // path/filename of topic file
124 topicfields = "t"; // fields of topic to use from topic file: default title
125 relfile= "noneassigned"; // path/filename of relevance judgements file
126 runname = "xapiantrec"; // name of the run
127 nterms = 100; // no of terms to pick from the topic
129 std::ifstream configfile( filename.c_str() );
131 if( !configfile ) {
132 cerr << "ERROR: you must specify a valid configuration file name" << endl;
133 exit(0);
134 } //else cout << "CONFIG) Opened configuration file: " << filename << endl;
136 while( !configfile.eof() ) {
138 // read in lines from the configuration file
139 string data;
140 // the tag
141 string config_tag;
142 // get the value
143 string config_value;
145 // identify and save information from the configuration file
146 if( !configfile.eof() ) {
147 configfile >> data;
148 config_tag = data;
149 configfile >> data;
150 config_value = data;
151 //cout << "GOT) values [" << config_tag << "] and [" << config_value << "]" << endl;
153 // record the tag
154 if( !configfile.eof() ) record_tag( config_tag, config_value );
155 } // END if
157 } // END while
159 } // END setup_config
161 int CONFIG_TREC::check_query_config() {
162 // ensure that all the information required by query generator has been entered in config file
164 if( queryfile == "noneassigned" ) {
165 cerr << "ERROR: you must specify the query file" << endl;
166 return 0;
167 } // END if
168 if( stopsfile == "noneassigned" ) {
169 cerr << "ERROR: you must specify the stop word file" << endl;
170 return 0;
171 } // END if
172 if( topicfile == "noneassigned" ) {
173 cerr << "ERROR: you must specify the topic file" << endl;
174 return 0;
175 } // END if
176 if( db == "noneassigned" ) {
177 cerr << "ERROR: you must specify the db path" << endl;
178 return 0;
179 } // END if
181 return 1;
183 } // END check_query_config
185 int CONFIG_TREC::check_index_config( ) {
186 // ensure that all the information required by indexer has been entered in config file
188 if( stopsfile == "noneassigned" ) {
189 cerr << "ERROR: you must specify the stops file" << endl;
190 return 0;
191 } // END if
193 if( db == "noneassigned" ) {
194 cerr << "ERROR: you must specify the db path" << endl;
195 return 0;
196 } // END if
198 if( textfile == "noneassigned" ) {
199 cerr << "ERROR: you must specify the db path" << endl;
200 return 0;
201 } // END if
203 return 1;
205 } // END check_index_config
207 int CONFIG_TREC::check_search_config( ) {
208 // ensure that all the information required by search program has been entered in config file
210 if( queryfile == "noneassigned" ) {
211 cerr << "ERROR: you must specify the " << endl;
212 return 0;
213 } // END if
214 if( stopsfile == "noneassigned" ) {
215 cerr << "ERROR: you must specify the " << endl;
216 return 0;
217 } // END if
219 return 1;
221 } // END check_search_config