Merge branch 'fixes' into main/rendor-staging
[ryzomcore.git] / nel / src / misc / words_dictionary.cpp
blob5478f1f437e8f57b905fe1dcbd1329c7382aa002
1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This source file has been modified by the following contributors:
5 // Copyright (C) 2015 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Affero General Public License as
9 // published by the Free Software Foundation, either version 3 of the
10 // License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Affero General Public License for more details.
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "stdmisc.h"
22 #include "nel/misc/words_dictionary.h"
23 #include "nel/misc/config_file.h"
24 #include "nel/misc/path.h"
25 #include "nel/misc/diff_tool.h"
27 using namespace std;
29 #ifdef DEBUG_NEW
30 #define new DEBUG_NEW
31 #endif
33 const string DefaultColTitle = "name";
35 namespace NLMISC {
37 NL_INSTANCE_COUNTER_IMPL(CWordsDictionary);
40 * Constructor
42 CWordsDictionary::CWordsDictionary()
47 /* Load the config file and the related words files. Return false in case of failure.
48 * Config file variables:
49 * - WordsPath: where to find <filter>_words_<languageCode>.txt
50 * - LanguageCode: language code (ex: en for English)
51 * - Utf8: results are in UTF8, otherwise in ANSI string
52 * - Filter: "*" for all files (default) or a name (ex: "item").
53 * - AdditionalFiles/AdditionalFileColumnTitles
55 bool CWordsDictionary::init( const string& configFileName )
57 // Read config file
58 bool cfFound = false;
59 CConfigFile cf;
60 try
62 cf.load( configFileName );
63 cfFound = true;
65 catch (const EConfigFile &e)
67 nlwarning( "WD: %s", e.what() );
69 string wordsPath, languageCode, filter = "*";
70 vector<string> additionalFiles, additionalFileColumnTitles;
71 bool filterAll = true, utf8 = false;
72 if ( cfFound )
74 CConfigFile::CVar *v = cf.getVarPtr( "WordsPath" );
75 if ( v )
77 wordsPath = v->asString();
78 /*if ( (!wordsPath.empty()) && (wordsPath[wordsPath.size()-1]!='/') )
79 wordsPath += '/';*/
81 v = cf.getVarPtr( "LanguageCode" );
82 if ( v )
83 languageCode = v->asString();
84 v = cf.getVarPtr( "Utf8" );
85 if ( v )
86 utf8 = (v->asInt() == 1);
87 v = cf.getVarPtr( "Filter" );
88 if ( v )
90 filter = v->asString();
91 filterAll = (filter == "*");
93 v = cf.getVarPtr( "AdditionalFiles" );
94 if ( v )
96 for ( uint i=0; i!=v->size(); ++i )
97 additionalFiles.push_back( v->asString( i ) );
98 v = cf.getVarPtr( "AdditionalFileColumnTitles" );
99 if ( v->size() != additionalFiles.size() )
101 nlwarning( "AdditionalFiles and AdditionalFileColumnTitles have different size, ignoring second one" );
102 additionalFileColumnTitles.resize( v->size(), DefaultColTitle );
104 else
106 for ( uint i=0; i!=v->size(); ++i )
107 additionalFileColumnTitles.push_back( v->asString( i ) );
112 if ( languageCode.empty() )
113 languageCode = "en";
115 // Load all found words files
116 const string ext = ".txt";
117 vector<string> fileList;
118 CPath::getPathContent( wordsPath, false, false, true, fileList );
119 for ( vector<string>::const_iterator ifl=fileList.begin(); ifl!=fileList.end(); ++ifl )
121 const string& filename = (*ifl);
122 string::size_type p = string::npos;
123 bool isAdditionalFile = false;
125 // Test if filename is in additional file list
126 uint iAdditionalFile;
127 for ( iAdditionalFile=0; iAdditionalFile!=additionalFiles.size(); ++iAdditionalFile )
129 if ( (p = filename.find( additionalFiles[iAdditionalFile] )) != string::npos )
131 isAdditionalFile = true;
132 break;
136 // Or test if filename is a words_*.txt file
137 string pattern = string("_words_") + languageCode + ext;
138 if ( isAdditionalFile ||
139 ((p = filename.find( pattern )) != string::npos) )
141 // Skip if a filter is specified and does not match the current file
142 if ( (!filterAll) && (filename.find( filter+pattern ) == string::npos) )
143 continue;
145 // Load file
146 nldebug( "WD: Loading %s", filename.c_str() );
147 _FileList.push_back( filename );
148 string::size_type origSize = filename.size() - ext.size();
149 const string truncFilename = CFile::getFilenameWithoutExtension( filename );
150 const string wordType = isAdditionalFile ? "" : truncFilename.substr( 0, p - (origSize - truncFilename.size()) );
151 const string colTitle = isAdditionalFile ? additionalFileColumnTitles[iAdditionalFile] : DefaultColTitle;
153 // Load Unicode Excel words file
154 STRING_MANAGER::TWorksheet worksheet;
155 STRING_MANAGER::loadExcelSheet( filename, worksheet );
156 uint ck, cw = 0;
157 if ( worksheet.findId( ck ) && worksheet.findCol( ucstring(colTitle), cw ) ) // =>
159 for ( std::vector<STRING_MANAGER::TWorksheet::TRow>::iterator ip = worksheet.begin(); ip!=worksheet.end(); ++ip )
161 if ( ip == worksheet.begin() ) // skip first row
162 continue;
163 STRING_MANAGER::TWorksheet::TRow& row = *ip;
164 _Keys.push_back( row[ck].toString() );
165 string word = utf8 ? row[cw].toUtf8() : row[cw].toString();
166 _Words.push_back( word );
169 else
170 nlwarning( "WD: %s ID or %s not found in %s", wordType.c_str(), colTitle.c_str(), filename.c_str() );
174 if ( _Keys.empty() )
176 if ( wordsPath.empty() )
177 nlwarning( "WD: WordsPath missing in config file %s", configFileName.c_str() );
178 nlwarning( "WD: %s_words_%s.txt not found", filter.c_str(), languageCode.c_str() );
179 return false;
181 else
182 return true;
187 * Set the result vector with strings corresponding to the input string:
188 * - If inputStr is partially or completely found in the keys, all the matching <key,words> are returned;
189 * - If inputStr is partially or completely in the words, all the matching <key, words> are returned.
190 * The following tags can modify the behaviour of the search algorithm:
191 * - ^mystring returns mystring only if it is at the beginning of a key or word
192 * - mystring$ returns mystring only if it is at the end of a key or word
193 * All returned words are in UTF8.
195 void CWordsDictionary::lookup( const CSString& inputStr, CVectorSString& resultVec ) const
197 // Prepare search string
198 if ( inputStr.empty() )
199 return;
201 CSString searchStr = inputStr;
202 bool findAtBeginning = false, findAtEnd = false;
203 if ( searchStr[0] == '^' )
205 searchStr = searchStr.substr( 1 );
206 findAtBeginning = true;
208 if ( searchStr[searchStr.size()-1] == '$' )
210 searchStr = searchStr.rightCrop( 1 );
211 findAtEnd = true;
214 // Search
215 for ( CVectorSString::const_iterator ivs=_Keys.begin(); ivs!=_Keys.end(); ++ivs )
217 const CSString& key = *ivs;
218 string::size_type p;
219 if ( (p = key.findNS( searchStr.c_str() )) != string::npos )
221 if ( ((!findAtBeginning) || (p==0)) && ((!findAtEnd) || (p==key.size()-searchStr.size())) )
222 resultVec.push_back( makeResult( key, _Words[ivs-_Keys.begin()] ) );
225 for ( CVectorSString::const_iterator ivs=_Words.begin(); ivs!=_Words.end(); ++ivs )
227 const CSString& word = *ivs;
228 string::size_type p;
229 if ( (p = word.findNS( searchStr.c_str() )) != string::npos )
231 if ( ((!findAtBeginning) || (p==0)) && ((!findAtEnd) || (p==word.size()-searchStr.size())) )
232 resultVec.push_back( makeResult( _Keys[ivs-_Words.begin()], word ) );
239 * Set the result vector with the word(s) corresponding to the key
241 void CWordsDictionary::exactLookupByKey( const CSString& key, CVectorSString& resultVec )
243 // Search
244 for ( CVectorSString::const_iterator ivs=_Keys.begin(); ivs!=_Keys.end(); ++ivs )
246 if ( key == *ivs )
247 resultVec.push_back( _Words[ivs-_Keys.begin()] );
253 * Make a result string
255 inline CSString CWordsDictionary::makeResult( const CSString &key, const CSString &word )
257 return key + ": " + word.c_str();
262 * Return the key contained in the provided string returned by lookup() (without extension)
264 CSString CWordsDictionary::getWordsKey( const CSString& resultStr )
266 return resultStr.splitTo( ':' );
269 } // NLMISC