Merge branch 'fixes' into main/rendor-staging
[ryzomcore.git] / ryzom / tools / translation_tools / extract_new_sheet_names.cpp
blob15004cd5998812b80517f90cd1fad1d9ead36b27
1 // Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This source file has been modified by the following contributors:
5 // Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Affero General Public License as
9 // published by the Free Software Foundation, either version 3 of the
10 // License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Affero General Public License for more details.
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "nel/misc/types_nl.h"
21 #include "nel/misc/config_file.h"
22 #include "nel/misc/sheet_id.h"
23 #include "nel/misc/path.h"
24 #include "nel/misc/diff_tool.h"
25 #include "nel/misc/algo.h"
26 #include "nel/georges/u_form.h"
27 #include "nel/georges/u_form_elm.h"
28 #include "nel/georges/load_form.h"
29 #include "nel/ligo/ligo_config.h"
30 #include "nel/ligo/primitive.h"
31 #include "nel/ligo/primitive_utils.h"
33 using namespace std;
34 using namespace NLMISC;
35 using namespace NLLIGO;
36 using namespace STRING_MANAGER;
39 static CLigoConfig LigoConfig;
40 static bool RemoveOlds = false;
43 // ***************************************************************************
45 * Interface to build the whole list of words (key id) for a specific worksheet
47 struct IWordListBuilder
49 virtual bool buildWordList(std::vector<string> &allWords, string workSheetFileName) =0;
53 // ***************************************************************************
55 * Specialisation of IWordListBuilder to list sheets in a directory
57 struct CSheetWordListBuilder : public IWordListBuilder
59 string SheetExt;
60 string SheetPath;
62 virtual bool buildWordList(std::vector<string> &allWords, string workSheetFileName)
64 SheetExt= toLowerAscii(SheetExt);
66 // verify the directory is correct
67 if(!CFile::isDirectory(SheetPath))
69 nlwarning("Error: Directory '%s' not found. '%s' Aborted", SheetPath.c_str(), workSheetFileName.c_str());
70 return false;
73 // list all files.
74 std::vector<string> allFiles;
75 allFiles.reserve(100000);
76 CPath::getPathContent(SheetPath, true, false, true, allFiles, NULL);
78 // Keep only the extension we want, and remove "_" (parent)
79 allWords.clear();
80 allWords.reserve(allFiles.size());
81 for(uint i=0;i<allFiles.size();i++)
83 string fileNameWithoutExt= CFile::getFilenameWithoutExtension(allFiles[i]);
84 string extension= toLowerAscii(CFile::getExtension(allFiles[i]));
85 // bad extension?
86 if(extension!=SheetExt)
87 continue;
88 // parent?
89 if(fileNameWithoutExt.empty()||fileNameWithoutExt[0]=='_')
90 continue;
91 // ok, add
92 allWords.push_back(toLowerAscii(fileNameWithoutExt));
95 return true;
101 // ***************************************************************************
103 * Specialisation of IWordListBuilder to list new region/place name from .primitive
105 struct CRegionPrimWordListBuilder : public IWordListBuilder
107 string PrimPath;
108 vector<string> PrimFilter;
110 virtual bool buildWordList(std::vector<string> &allWords, string workSheetFileName)
112 // verify the directory is correct
113 if(!CFile::isDirectory(PrimPath))
115 nlwarning("Error: Directory '%s' not found. '%s' Aborted", PrimPath.c_str(), workSheetFileName.c_str());
116 return false;
119 // list all files.
120 std::vector<string> allFiles;
121 allFiles.reserve(100000);
122 CPath::getPathContent(PrimPath, true, false, true, allFiles, NULL);
124 // parse all primitive that match the filter
125 allWords.clear();
126 allWords.reserve(100000);
127 // to avoid duplicate
128 set<string> allWordSet;
129 for(uint i=0;i<allFiles.size();i++)
131 string fileName= CFile::getFilename(allFiles[i]);
132 // filter don't match?
133 bool oneMatch= false;
134 for(uint filter=0;filter<PrimFilter.size();filter++)
136 if(testWildCard(fileName, PrimFilter[filter]))
137 oneMatch= true;
139 if(!oneMatch)
140 continue;
142 // ok, read the file
143 CPrimitives PrimDoc;
144 CPrimitiveContext::instance().CurrentPrimitive = &PrimDoc;
145 if (!loadXmlPrimitiveFile(PrimDoc, allFiles[i], LigoConfig))
147 nlwarning("Error: cannot open file '%s'. '%s' Aborted", allFiles[i].c_str(), workSheetFileName.c_str());
148 CPrimitiveContext::instance().CurrentPrimitive = NULL;
149 return false;
151 CPrimitiveContext::instance().CurrentPrimitive = NULL;
153 // For all primitives of interest
154 const char *listClass[]= {"continent", "region", "place", "stable",
155 "teleport_destination", "room_template"};
156 const char *listProp[]= {"name", "name", "name", "name",
157 "place_name", "place_name"};
158 const uint numListClass= sizeof(listClass)/sizeof(listClass[0]);
159 const uint numListProp= sizeof(listProp)/sizeof(listProp[0]);
160 nlctassert(numListProp==numListClass);
161 for(uint cid=0;cid<numListClass;cid++)
163 // parse the whole hierarchy
164 TPrimitiveClassPredicate predCont(listClass[cid]);
165 CPrimitiveSet<TPrimitiveClassPredicate> setPlace;
166 TPrimitiveSet placeRes;
167 setPlace.buildSet(PrimDoc.RootNode, predCont, placeRes);
168 // for all found
169 for (uint placeId= 0; placeId < placeRes.size(); ++placeId)
171 string primName;
172 if(placeRes[placeId]->getPropertyByName(listProp[cid], primName) && !primName.empty())
174 primName= toLowerAscii(primName);
175 // avoid duplicate
176 if(allWordSet.insert(primName).second)
178 allWords.push_back(primName);
185 return true;
190 // ***************************************************************************
191 void extractNewWords(string workSheetFileName, string columnId, IWordListBuilder &wordListBuilder)
193 uint i;
195 // **** Load the excel sheet
196 // load
197 TWorksheet workSheet;
198 if(!loadExcelSheet(workSheetFileName, workSheet, true))
200 nlwarning("Error reading '%s'. Aborted", workSheetFileName.c_str());
201 return;
203 // get the key column index
204 uint keyColIndex = 0;
206 if(!workSheet.findCol(columnId, keyColIndex))
208 nlwarning("Error: Don't find the column '%s'. '%s' Aborted", columnId.c_str(), workSheetFileName.c_str());
209 return;
211 // get the name column index
212 uint nameColIndex = 0;
213 if(!workSheet.findCol(ucstring("name"), nameColIndex))
215 nlwarning("Error: Don't find the column 'name'. '%s' Aborted", workSheetFileName.c_str());
216 return;
218 // Make a copy of this worksheet, with strlwr on the key
219 // Yoyo: I prefer not modify the original worksheet (don't know what bad side effect it can have....)
220 TWorksheet workSheetLwr= workSheet;
221 for(i=0;i<workSheetLwr.size();i++)
223 ucstring key= workSheetLwr.getData(i, keyColIndex);
224 workSheetLwr.setData(i, keyColIndex, toLower(key));
228 // **** List all words with the builder given
229 std::vector<string> allWords;
230 if(!wordListBuilder.buildWordList(allWords, workSheetFileName))
231 return;
234 // **** Append new one to the worksheet
235 uint nbAdd= 0;
236 for(i=0;i<allWords.size();i++)
238 string keyName= allWords[i];
239 uint rowIdx;
240 // search in the key lowred worksheet (avoid case bugs (they do exist...))
241 if (!workSheetLwr.findRow(keyColIndex, keyName, rowIdx))
243 // we need to add the entry. Add it to the 2 workSheet to maintain coherence (avoid non unique etc...)
244 rowIdx = workSheetLwr.size();
245 // add to the workSheetLwr
246 workSheetLwr.resize(workSheetLwr.size()+1);
247 workSheetLwr.setData(rowIdx, keyColIndex, keyName);
248 workSheetLwr.setData(rowIdx, nameColIndex, string("<GEN>")+keyName);
249 // add to the workSheet
250 workSheet.resize(workSheet.size()+1);
251 workSheet.setData(rowIdx, keyColIndex, keyName);
252 workSheet.setData(rowIdx, nameColIndex, string("<GEN>")+keyName);
254 nbAdd++;
259 // **** Remove no more present ones (and log)
260 uint nbRemove= 0;
261 if(RemoveOlds)
263 // Build as a set
264 std::set<string> allWordSet;
265 for(i=0;i<allWords.size();i++)
266 allWordSet.insert(allWords[i]);
267 // For all rows, append to a copy if not erased
268 TWorksheet tmpCopy, tmpCopyLwr;
269 nlassert(workSheet.ColCount==workSheetLwr.ColCount);
270 nlassert(workSheet.size()==workSheetLwr.size());
271 tmpCopy.setColCount(workSheet.ColCount);
272 tmpCopy.resize(workSheet.size());
273 tmpCopyLwr.setColCount(workSheet.ColCount);
274 tmpCopyLwr.resize(workSheet.size());
275 uint dstRowId=0;
276 for(i=0;i<workSheet.size();i++)
278 string keyStr= workSheetLwr.getData(i, keyColIndex).toString();
279 // if first line, or if the key (lwred) is found in the list of files
280 if(i==0 || allWordSet.find(keyStr)!=allWordSet.end())
282 tmpCopy.Data[dstRowId]= workSheet.Data[i];
283 tmpCopyLwr.Data[dstRowId]= workSheetLwr.Data[i];
284 dstRowId++;
286 else
288 nbRemove++;
289 // log
290 NLMISC::InfoLog->displayRawNL("'%s': '%s' entry erased at line '%d'.", workSheetFileName.c_str(),
291 keyStr.c_str(), i);
294 // resize to correct new size
295 tmpCopy.resize(dstRowId);
296 tmpCopyLwr.resize(dstRowId);
298 // copy back
299 workSheet= tmpCopy;
300 workSheetLwr= tmpCopyLwr;
304 // **** Save
305 if(nbAdd==0 && nbRemove==0)
307 if(RemoveOlds)
308 NLMISC::InfoLog->displayRawNL("'%s': No deprecated entry found.", workSheetFileName.c_str());
309 NLMISC::InfoLog->displayRawNL("'%s': No new entry found.", workSheetFileName.c_str());
310 // Don't save
312 else
314 if(RemoveOlds)
315 NLMISC::InfoLog->displayRawNL("'%s': %d deprecated entry erased.", workSheetFileName.c_str(), nbRemove);
316 NLMISC::InfoLog->displayRawNL("'%s': %d new entry found.", workSheetFileName.c_str(), nbAdd);
317 // Save the not lowered worksheet
318 ucstring s = prepareExcelSheet(workSheet);
321 CI18N::writeTextFile(workSheetFileName.c_str(), s);
323 catch (const Exception &e)
325 nlwarning("cannot save file: '%s'. Reason: %s", workSheetFileName.c_str(), e.what());
331 // ***************************************************************************
332 int extractNewSheetNames(int argc, char *argv[])
334 // **** read the parameters
335 for (int i=2; i<argc; ++i)
337 string s = argv[i];
338 if (s == "-r")
340 // active remove mode
341 RemoveOlds = true;
343 else
345 nlwarning("Unknow option '%s'", argv[i]);
346 return -1;
350 // **** avoid some flood
351 NLMISC::createDebug();
352 NLMISC::DebugLog->addNegativeFilter("numCol changed to");
353 NLMISC::InfoLog->addNegativeFilter("CPath::addSearchPath");
356 // **** read the configuration file
357 CConfigFile cf;
358 cf.load("bin/translation_tools.cfg");
359 CConfigFile::CVar &paths = cf.getVar("Paths");
360 CConfigFile::CVar &pathNoRecurse= cf.getVar("PathsNoRecurse");
361 CConfigFile::CVar &ligoClassFile= cf.getVar("LigoClassFile");
362 CConfigFile::CVar &leveldesignDataPathVar = cf.getVar("LeveldesignDataPath");
364 // parse path
365 for (uint i=0; i<paths.size(); ++i)
367 CPath::addSearchPath(NLMISC::expandEnvironmentVariables(paths.asString(i)), true, false);
369 for (uint i=0; i<pathNoRecurse.size(); ++i)
371 CPath::addSearchPath(NLMISC::expandEnvironmentVariables(pathNoRecurse.asString(i)), false, false);
374 std::string leveldesignDataPath = CPath::standardizePath(NLMISC::expandEnvironmentVariables(leveldesignDataPathVar.asString()));
376 // init ligo config once
377 string ligoPath = CPath::lookup(NLMISC::expandEnvironmentVariables(ligoClassFile.asString()), true, true);
378 LigoConfig.readPrimitiveClass(ligoPath.c_str(), false);
379 NLLIGO::Register();
380 CPrimitiveContext::instance().CurrentLigoConfig = &LigoConfig;
382 // **** Parse all the different type of sheets
383 const char *sheetDefs[]=
385 // 1st is the name of the worksheet file.
386 // 2nd is the Key column identifier.
387 // 3rd is the sheet extension
388 // 4th is the directory where to find new sheets
389 "work/item_words_wk.txt", "item ID", "sitem", "leveldesign/game_element/sitem",
390 "work/creature_words_wk.txt", "creature ID", "creature", "leveldesign/game_elem/creature/fauna", // take fauna only because other are special
391 "work/sbrick_words_wk.txt", "sbrick ID", "sbrick", "leveldesign/game_element/sbrick",
392 "work/sphrase_words_wk.txt", "sphrase ID", "sphrase", "leveldesign/game_element/sphrase",
394 uint numSheetDefs= sizeof(sheetDefs) / (4*sizeof(sheetDefs[0]));
396 // For all different type of sheet
397 for(uint i=0;i<numSheetDefs;i++)
399 CSheetWordListBuilder builder;
400 builder.SheetExt= sheetDefs[i*4+2];
401 builder.SheetPath= leveldesignDataPath + sheetDefs[i*4+3];
402 extractNewWords(sheetDefs[i*4+0], sheetDefs[i*4+1], builder);
405 // **** Parse place and region names
407 // build place names
408 CRegionPrimWordListBuilder builder;
409 builder.PrimPath= leveldesignDataPath + "primitives";
410 builder.PrimFilter.push_back("region_*.primitive");
411 builder.PrimFilter.push_back("indoors_*.primitive");
412 extractNewWords("work/place_words_wk.txt", "placeId", builder);
415 return 0;