1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
4 // This source file has been modified by the following contributors:
5 // Copyright (C) 2015 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Affero General Public License as
9 // published by the Free Software Foundation, either version 3 of the
10 // License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Affero General Public License for more details.
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "nel/misc/words_dictionary.h"
23 #include "nel/misc/config_file.h"
24 #include "nel/misc/path.h"
25 #include "nel/misc/diff_tool.h"
33 const string DefaultColTitle
= "name";
37 NL_INSTANCE_COUNTER_IMPL(CWordsDictionary
);
42 CWordsDictionary::CWordsDictionary()
47 /* Load the config file and the related words files. Return false in case of failure.
48 * Config file variables:
49 * - WordsPath: where to find <filter>_words_<languageCode>.txt
50 * - LanguageCode: language code (ex: en for English)
51 * - Utf8: results are in UTF8, otherwise in ANSI string
52 * - Filter: "*" for all files (default) or a name (ex: "item").
53 * - AdditionalFiles/AdditionalFileColumnTitles
55 bool CWordsDictionary::init( const string
& configFileName
)
62 cf
.load( configFileName
);
65 catch (const EConfigFile
&e
)
67 nlwarning( "WD: %s", e
.what() );
69 string wordsPath
, languageCode
, filter
= "*";
70 vector
<string
> additionalFiles
, additionalFileColumnTitles
;
71 bool filterAll
= true, utf8
= false;
74 CConfigFile::CVar
*v
= cf
.getVarPtr( "WordsPath" );
77 wordsPath
= v
->asString();
78 /*if ( (!wordsPath.empty()) && (wordsPath[wordsPath.size()-1]!='/') )
81 v
= cf
.getVarPtr( "LanguageCode" );
83 languageCode
= v
->asString();
84 v
= cf
.getVarPtr( "Utf8" );
86 utf8
= (v
->asInt() == 1);
87 v
= cf
.getVarPtr( "Filter" );
90 filter
= v
->asString();
91 filterAll
= (filter
== "*");
93 v
= cf
.getVarPtr( "AdditionalFiles" );
96 for ( uint i
=0; i
!=v
->size(); ++i
)
97 additionalFiles
.push_back( v
->asString( i
) );
98 v
= cf
.getVarPtr( "AdditionalFileColumnTitles" );
99 if ( v
->size() != additionalFiles
.size() )
101 nlwarning( "AdditionalFiles and AdditionalFileColumnTitles have different size, ignoring second one" );
102 additionalFileColumnTitles
.resize( v
->size(), DefaultColTitle
);
106 for ( uint i
=0; i
!=v
->size(); ++i
)
107 additionalFileColumnTitles
.push_back( v
->asString( i
) );
112 if ( languageCode
.empty() )
115 // Load all found words files
116 const string ext
= ".txt";
117 vector
<string
> fileList
;
118 CPath::getPathContent( wordsPath
, false, false, true, fileList
);
119 for ( vector
<string
>::const_iterator ifl
=fileList
.begin(); ifl
!=fileList
.end(); ++ifl
)
121 const string
& filename
= (*ifl
);
122 string::size_type p
= string::npos
;
123 bool isAdditionalFile
= false;
125 // Test if filename is in additional file list
126 uint iAdditionalFile
;
127 for ( iAdditionalFile
=0; iAdditionalFile
!=additionalFiles
.size(); ++iAdditionalFile
)
129 if ( (p
= filename
.find( additionalFiles
[iAdditionalFile
] )) != string::npos
)
131 isAdditionalFile
= true;
136 // Or test if filename is a words_*.txt file
137 string pattern
= string("_words_") + languageCode
+ ext
;
138 if ( isAdditionalFile
||
139 ((p
= filename
.find( pattern
)) != string::npos
) )
141 // Skip if a filter is specified and does not match the current file
142 if ( (!filterAll
) && (filename
.find( filter
+pattern
) == string::npos
) )
146 nldebug( "WD: Loading %s", filename
.c_str() );
147 _FileList
.push_back( filename
);
148 string::size_type origSize
= filename
.size() - ext
.size();
149 const string truncFilename
= CFile::getFilenameWithoutExtension( filename
);
150 const string wordType
= isAdditionalFile
? "" : truncFilename
.substr( 0, p
- (origSize
- truncFilename
.size()) );
151 const string colTitle
= isAdditionalFile
? additionalFileColumnTitles
[iAdditionalFile
] : DefaultColTitle
;
153 // Load Unicode Excel words file
154 STRING_MANAGER::TWorksheet worksheet
;
155 STRING_MANAGER::loadExcelSheet( filename
, worksheet
);
157 if ( worksheet
.findId( ck
) && worksheet
.findCol( ucstring(colTitle
), cw
) ) // =>
159 for ( std::vector
<STRING_MANAGER::TWorksheet::TRow
>::iterator ip
= worksheet
.begin(); ip
!=worksheet
.end(); ++ip
)
161 if ( ip
== worksheet
.begin() ) // skip first row
163 STRING_MANAGER::TWorksheet::TRow
& row
= *ip
;
164 _Keys
.push_back( row
[ck
].toString() );
165 string word
= utf8
? row
[cw
].toUtf8() : row
[cw
].toString();
166 _Words
.push_back( word
);
170 nlwarning( "WD: %s ID or %s not found in %s", wordType
.c_str(), colTitle
.c_str(), filename
.c_str() );
176 if ( wordsPath
.empty() )
177 nlwarning( "WD: WordsPath missing in config file %s", configFileName
.c_str() );
178 nlwarning( "WD: %s_words_%s.txt not found", filter
.c_str(), languageCode
.c_str() );
187 * Set the result vector with strings corresponding to the input string:
188 * - If inputStr is partially or completely found in the keys, all the matching <key,words> are returned;
189 * - If inputStr is partially or completely in the words, all the matching <key, words> are returned.
190 * The following tags can modify the behaviour of the search algorithm:
191 * - ^mystring returns mystring only if it is at the beginning of a key or word
192 * - mystring$ returns mystring only if it is at the end of a key or word
193 * All returned words are in UTF8.
195 void CWordsDictionary::lookup( const CSString
& inputStr
, CVectorSString
& resultVec
) const
197 // Prepare search string
198 if ( inputStr
.empty() )
201 CSString searchStr
= inputStr
;
202 bool findAtBeginning
= false, findAtEnd
= false;
203 if ( searchStr
[0] == '^' )
205 searchStr
= searchStr
.substr( 1 );
206 findAtBeginning
= true;
208 if ( searchStr
[searchStr
.size()-1] == '$' )
210 searchStr
= searchStr
.rightCrop( 1 );
215 for ( CVectorSString::const_iterator ivs
=_Keys
.begin(); ivs
!=_Keys
.end(); ++ivs
)
217 const CSString
& key
= *ivs
;
219 if ( (p
= key
.findNS( searchStr
.c_str() )) != string::npos
)
221 if ( ((!findAtBeginning
) || (p
==0)) && ((!findAtEnd
) || (p
==key
.size()-searchStr
.size())) )
222 resultVec
.push_back( makeResult( key
, _Words
[ivs
-_Keys
.begin()] ) );
225 for ( CVectorSString::const_iterator ivs
=_Words
.begin(); ivs
!=_Words
.end(); ++ivs
)
227 const CSString
& word
= *ivs
;
229 if ( (p
= word
.findNS( searchStr
.c_str() )) != string::npos
)
231 if ( ((!findAtBeginning
) || (p
==0)) && ((!findAtEnd
) || (p
==word
.size()-searchStr
.size())) )
232 resultVec
.push_back( makeResult( _Keys
[ivs
-_Words
.begin()], word
) );
239 * Set the result vector with the word(s) corresponding to the key
241 void CWordsDictionary::exactLookupByKey( const CSString
& key
, CVectorSString
& resultVec
)
244 for ( CVectorSString::const_iterator ivs
=_Keys
.begin(); ivs
!=_Keys
.end(); ++ivs
)
247 resultVec
.push_back( _Words
[ivs
-_Keys
.begin()] );
253 * Make a result string
255 inline CSString
CWordsDictionary::makeResult( const CSString
&key
, const CSString
&word
)
257 return key
+ ": " + word
.c_str();
262 * Return the key contained in the provided string returned by lookup() (without extension)
264 CSString
CWordsDictionary::getWordsKey( const CSString
& resultStr
)
266 return resultStr
.splitTo( ':' );