1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
4 // This source file has been modified by the following contributors:
5 // Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Affero General Public License as
9 // published by the Free Software Foundation, either version 3 of the
10 // License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Affero General Public License for more details.
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "nel/misc/diff_tool.h"
23 #include "nel/misc/path.h"
25 using namespace NLMISC
;
32 namespace STRING_MANAGER
35 uint64
makePhraseHash(const TPhrase
&phrase
)
38 text
= phrase
.Parameters
;
39 for (uint i
=0; i
<phrase
.Clauses
.size(); ++i
)
41 text
+= phrase
.Clauses
[i
].Conditions
;
42 text
+= phrase
.Clauses
[i
].Identifier
;
43 text
+= phrase
.Clauses
[i
].Text
;
46 return CI18N::makeHash(text
);
52 bool parseHashFromComment(const ucstring
&comments
, uint64
&hashValue
)
54 string str
= comments
.toString();
56 string::size_type pos
= str
.find("HASH_VALUE ");
57 if (pos
== string::npos
)
60 string hashStr
= str
.substr(pos
+ 11, 16);
62 hashValue
= CI18N::stringToHash(hashStr
);
67 uint32
countLine(const ucstring
&text
, const ucstring::const_iterator upTo
)
70 ucstring::const_iterator
first(text
.begin());
72 for (; first
!= upTo
; ++first
)
81 bool loadStringFile(const std::string filename
, vector
<TStringInfo
> &stringInfos
, bool forceRehash
, ucchar openMark
, ucchar closeMark
, bool specialCase
)
89 size = fp.getFileSize();
90 buffer = new uint8[size];
91 fp.serialBuffer(buffer, size);
93 catch(const Exception &e)
95 nlinfo("Can't open file [%s] (%s)\n", filename.c_str(), e.what());
99 /* FILE *fp = nlfopen(filename, "rb");
103 nlinfo("Can't open file [%s]\n", filename.c_str());
109 // move to end of file
110 fseek(fp, 0, SEEK_END);
115 uint8 *buffer = new uint8[uint(pos)];
118 uint size = fread(buffer, 1, uint(pos), fp);
123 CI18N::readTextFile(filename
, text
, false, true, CI18N::LINE_FMT_LF
);
124 // CI18N::readTextBuffer(buffer, size, text);
127 // ok, parse the file now.
128 ucstring::const_iterator
first(text
.begin()), last(text
.end());
129 std::string
lastLabel("nothing");
131 while (first
!= last
)
134 CI18N::skipWhiteSpace(first
, last
, &si
.Comments
);
138 // check if there is only swap command remaining in comment
139 if (si
.Comments
.find(ucstring("// DIFF SWAP ")) != ucstring::npos
)
141 stringInfos
.push_back(si
);
146 // try to read a #fileline preprocessor command
147 if (CI18N::matchToken("#fileline", first
, last
))
149 // for now, just skip
150 uint32 lineCounter
=0; // we count line another way
151 CI18N::skipLine(first
, last
, lineCounter
);
153 // begin parse of next line
157 if (!CI18N::parseLabel(first
, last
, si
.Identifier
))
159 uint32 line
= countLine(text
, first
);
160 nlwarning("DT: Fatal : In '%s', line %u: Invalid label after '%s'",
166 lastLabel
= si
.Identifier
;
168 CI18N::skipWhiteSpace(first
, last
, &si
.Comments
);
170 if (!CI18N::parseMarkedString(openMark
, closeMark
, first
, last
, si
.Text
))
172 uint32 line
= countLine(text
, first
);
173 nlwarning("DT: Fatal : In '%s', line %u: Invalid text value for label %s",
182 CI18N::skipWhiteSpace(first
, last
, &si
.Comments
);
184 if (!CI18N::parseMarkedString(openMark
, closeMark
, first
, last
, si
.Text2
))
186 uint32 line
= countLine(text
, first
);
187 nlwarning("DT: Fatal: In '%s' line %u: Invalid text2 value label %s",
196 if (forceRehash
|| !parseHashFromComment(si
.Comments
, si
.HashValue
))
198 // compute the hash value from text.
199 si
.HashValue
= CI18N::makeHash(si
.Text
);
200 // nldebug("Generating hash for %s as %s", si.Identifier.c_str(), CI18N::hashToString(si.HashValue).c_str());
204 // nldebug("Comment = [%s]", si.Comments.toString().c_str());
205 // nldebug("Retrieving hash for %s as %s", si.Identifier.c_str(), CI18N::hashToString(si.HashValue).c_str());
207 stringInfos
.push_back(si
);
211 // check identifier uniqueness
215 set
<string
>::iterator it
;
216 for (uint i
=0; i
<stringInfos
.size(); ++i
)
218 it
= unik
.find(stringInfos
[i
].Identifier
);
219 if (it
!= unik
.end())
221 nlwarning("DT: loadStringFile : identifier '%s' exist twice", stringInfos
[i
].Identifier
.c_str() );
225 unik
.insert(stringInfos
[i
].Identifier
);
236 ucstring
prepareStringFile(const vector
<TStringInfo
> &strings
, bool removeDiffComments
, bool noDiffInfo
)
240 vector
<TStringInfo
>::const_iterator
first(strings
.begin()), last(strings
.end());
241 for (; first
!= last
; ++first
)
244 const TStringInfo
&si
= *first
;
245 string comment
= si
.Comments
.toUtf8();
246 vector
<string
> lines
;
247 explode(comment
, string("\n"), lines
, true);
250 for (i
=0; i
<lines
.size(); ++i
)
252 if (removeDiffComments
)
254 if (lines
[i
].find("// DIFF ") != string::npos
)
256 lines
.erase(lines
.begin()+i
);
261 if (lines
[i
].find("// INDEX ") != string::npos
)
263 lines
.erase(lines
.begin()+i
);
266 else if (lines
[i
].find("// HASH_VALUE ") != string::npos
)
268 lines
.erase(lines
.begin()+i
);
274 for (i
=0; i
<lines
.size(); ++i
)
276 comment
+= lines
[i
] + "\n";
278 si
.Comments
= ucstring(comment
);
281 if (!si
.Identifier
.empty() || !si
.Text
.empty())
283 // add hash value comment if needed
284 // if (si.Comments.find(ucstring("// HASH_VALUE ")) == ucstring::npos)
287 str
+= "// HASH_VALUE " + CI18N::hashToString(si
.HashValue
) + "\n";
288 str
+= "// INDEX " + NLMISC::toString("%u", first
-strings
.begin()) + "\n";
290 str
+= si
.Identifier
+ '\t';
292 string text
= CI18N::makeMarkedString('[', ']', si
.Text
).toUtf8();
294 // add new line and tab after each \n tag
295 string::size_type pos
;
296 while ((pos
= text
.find("\\n")) != string::npos
)
298 text2
+= text
.substr(0, pos
+2) + "\n\t";
299 text
= text
.substr(pos
+2);
301 text2
+= text
;//.substr(0, pos+2);
302 str
+= text2
+ "\n\n";
303 // str += CI18N::makeMarkedString('[', ']', si.Text) + nl + nl;
306 // nldebug("Adding string [%s]", str.toString().c_str());
310 return ucstring::makeFromUtf8(diff
);
314 bool readPhraseFile(const std::string
&filename
, vector
<TPhrase
> &phrases
, bool forceRehash
)
318 CI18N::readTextFile(filename
, doc
, false, true, CI18N::LINE_FMT_LF
);
320 return readPhraseFileFromString(doc
, filename
, phrases
, forceRehash
);
323 bool readPhraseFileFromString(ucstring
const& doc
, const std::string
&filename
, vector
<TPhrase
> &phrases
, bool forceRehash
)
325 std::string
lastRead("nothing");
327 ucstring::const_iterator
first(doc
.begin()), last(doc
.end());
328 while (first
!= last
)
332 CI18N::skipWhiteSpace(first
, last
, &phrase
.Comments
);
336 if (!phrase
.Comments
.empty())
338 // push the resulting comment
339 phrases
.push_back(phrase
);
344 // try to read a #fileline preprocessor command
345 if (CI18N::matchToken("#fileline", first
, last
))
347 // for now, just skip
348 uint32 lineCounter
=0; // we count line another way
349 CI18N::skipLine(first
, last
, lineCounter
);
351 // begin parse of next line
355 if (!CI18N::parseLabel(first
, last
, phrase
.Identifier
))
357 uint32 line
= countLine(doc
, first
);
358 nlwarning("DT: In '%s' line %u: Error parsing phrase identifier after %s\n",
364 // nldebug("DT: parsing phrase '%s'", phrase.Identifier.c_str());
365 lastRead
= phrase
.Identifier
;
366 CI18N::skipWhiteSpace(first
, last
, &phrase
.Comments
);
367 if (!CI18N::parseMarkedString('(', ')', first
, last
, phrase
.Parameters
))
369 uint32 line
= countLine(doc
, first
);
370 nlwarning("DT: in '%s', line %u: Error parsing parameter list for phrase %s\n",
373 phrase
.Identifier
.c_str());
376 CI18N::skipWhiteSpace(first
, last
, &phrase
.Comments
);
377 if (first
== last
|| *first
!= '{')
379 uint32 line
= countLine(doc
, first
);
380 nlwarning("DT: In '%s', line %u: Error parsing block opening '{' in phase %s\n",
383 phrase
.Identifier
.c_str());
390 while (first
!= last
&& *first
!= '}')
393 // append the comment preread at previous pass
394 clause
.Comments
= temp
;
397 CI18N::skipWhiteSpace(first
, last
, &clause
.Comments
);
400 nlwarning("DT: Found end of file in non closed block for phrase %s\n", phrase
.Identifier
.c_str());
407 // skip the conditional expression
409 while (first
!= last
&& *first
== '(')
411 if (!CI18N::parseMarkedString('(', ')', first
, last
, cond
))
413 uint32 line
= countLine(doc
, first
);
414 nlwarning("DT: In '%s' line %u: Error parsing conditional expression in phrase %s, clause %u\n",
417 phrase
.Identifier
.c_str(),
418 phrase
.Clauses
.size()+1);
422 // only prepend a space if required
423 if (!clause
.Conditions
.empty()) clause
.Conditions
+= " ";
425 clause
.Conditions
+= "(" + cond
+ ")";
426 CI18N::skipWhiteSpace(first
, last
, &clause
.Comments
);
431 nlwarning("DT: in '%s': Found end of file in non closed block for phrase %s\n",
433 phrase
.Identifier
.c_str());
436 // read the idnetifier (if any)
437 CI18N::parseLabel(first
, last
, clause
.Identifier
);
438 CI18N::skipWhiteSpace(first
, last
, &temp
);
440 if (CI18N::parseMarkedString('[', ']', first
, last
, clause
.Text
))
442 // the last read comment is for this clause.
443 clause
.Comments
+= temp
;
448 uint32 line
= countLine(doc
, first
);
449 nlwarning("DT: in '%s' line %u: Error reading text for clause %u (%s) in phrase %s\n",
452 phrase
.Clauses
.size()+1,
453 clause
.Identifier
.c_str(),
454 phrase
.Identifier
.c_str());
459 phrase
.Clauses
.push_back(clause
);
461 CI18N::skipWhiteSpace(first
, last
);
462 if (first
== last
|| *first
!= '}')
464 uint32 line
= countLine(doc
, first
);
465 nlwarning("DT: in '%s' line %u: Missing block closing tag '}' in phrase %s\n",
468 phrase
.Identifier
.c_str());
473 // handle hash value.
474 if (forceRehash
|| !parseHashFromComment(phrase
.Comments
, phrase
.HashValue
))
476 // the hash is not in the comment, compute it.
477 phrase
.HashValue
= makePhraseHash(phrase
);
480 // the has is perhaps in the comment
481 ucstring::size_type pos
= phrase
.Comments
.find(ucstring("// HASH_VALUE"));
482 if (pos
!= ucstring::npos
)
484 phrase
.Comments
= phrase
.Comments
.substr(0, pos
);
489 // nldebug("DT : storing phrase '%s'", phrase.Identifier.c_str());
490 phrases
.push_back(phrase
);
493 // check identifier uniqueness
497 set
<string
>::iterator it
;
498 for (uint i
=0; i
<phrases
.size(); ++i
)
500 it
= unik
.find(phrases
[i
].Identifier
);
501 if (it
!= unik
.end())
503 nlwarning("DT: readPhraseFile : identifier '%s' exist twice", phrases
[i
].Identifier
.c_str() );
507 unik
.insert(phrases
[i
].Identifier
);
515 ucstring
tabLines(uint nbTab
, const ucstring
&str
)
520 for (uint i
=0; i
<nbTab
; ++i
)
521 tabs
.push_back('\t');
524 ucstring::const_iterator
first(str
.begin()), last(str
.end());
525 for (; first
!= last
; ++first
)
532 while (ret
[ret
.size()-1] == '\t')
533 ret
= ret
.substr(0, ret
.size()-1);
538 ucstring
preparePhraseFile(const vector
<TPhrase
> &phrases
, bool removeDiffComments
)
541 vector
<TPhrase
>::const_iterator
first(phrases
.begin()), last(phrases
.end());
542 for (; first
!= last
; ++first
)
544 const TPhrase
&p
= *first
;
546 if (removeDiffComments
)
548 string comment
= p
.Comments
.toString();
549 vector
<string
> lines
;
550 explode(comment
, string("\n"), lines
, true);
553 for (i
=0; i
<lines
.size(); ++i
)
555 if (lines
[i
].find("// DIFF ") != string::npos
)
557 lines
.erase(lines
.begin()+i
);
563 for (i
=0; i
<lines
.size(); ++i
)
565 comment
+= lines
[i
] + "\n";
567 p
.Comments
= ucstring(comment
);
571 if (!p
.Identifier
.empty() || !p
.Clauses
.empty())
573 if (p
.Comments
.find(ucstring("// HASH_VALUE ")) == ucstring::npos
)
575 // add the hash value.
576 ret
+= ucstring("// HASH_VALUE ")+CI18N::hashToString(p
.HashValue
) + nl
;
578 ret
+= p
.Identifier
+ " ("+p
.Parameters
+ ")" + nl
;
581 for (uint i
=0; i
<p
.Clauses
.size(); ++i
)
583 const TClause
&c
= p
.Clauses
[i
];
584 if (!c
.Comments
.empty())
586 ucstring comment
= tabLines(1, c
.Comments
);
587 ret
+= comment
; // + '\n';
589 if (!c
.Conditions
.empty())
591 ucstring cond
= tabLines(1, c
.Conditions
);
595 // ucstring text = CI18N::makeMarkedString('[', ']', c.Text);
597 ucstring text
= CI18N::makeMarkedString('[', ']', c
.Text
);;
599 // add new line and tab after each \n tag
600 ucstring::size_type pos
;
601 const ucstring
nlTag("\\n");
602 while ((pos
= text
.find(nlTag
)) != ucstring::npos
)
604 text2
+= text
.substr(0, pos
+2) + nl
;
605 text
= text
.substr(pos
+2);
607 text2
+= text
;//.substr(0, pos+2);
611 text
= tabLines(3, text
);
613 text
= text
.substr(3);
614 ret
+= '\t' + c
.Identifier
+ '\t' + text
+ nl
+ nl
;
624 bool loadExcelSheet(const string filename
, TWorksheet
&worksheet
, bool checkUnique
)
626 // Yoyo: must test with CIFile because can be packed into a .bnp on client...
628 if(!fp
.open(filename
))
630 nldebug("DT: Can't open file [%s]\n", filename
.c_str());
636 CI18N::readTextFile(filename
, str
, false, false, CI18N::LINE_FMT_LF
);
638 if (!readExcelSheet(str
, worksheet
, checkUnique
))
644 bool readExcelSheet(const ucstring
&str
, TWorksheet
&worksheet
, bool checkUnique
)
649 // copy the str to a big ucchar array => Avoid allocation / free
650 vector
<ucchar
> strArray
;
652 strArray
.resize(str
.size()+1);
653 strArray
[strArray
.size()-1]= 0;
654 memcpy(&strArray
[0], &str
[0], str
.size()*sizeof(ucchar
));
656 // size of new line characters
657 size_t sizeOfNl
= nl
.length();
659 // **** Build array of lines. just point to strArray, and fill 0 where appropriated
660 vector
<ucchar
*> lines
;
662 ucstring::size_type pos
= 0;
663 ucstring::size_type lastPos
= 0;
664 while ((pos
= str
.find(nl
, lastPos
)) != ucstring::npos
)
669 // nldebug("Found line : [%s]", ucstring(&strArray[lastPos]).toString().c_str());
670 lines
.push_back(&strArray
[lastPos
]);
672 lastPos
= pos
+ sizeOfNl
;
675 // Must add last line if no \n ending
676 if (lastPos
< str
.size())
680 // nldebug("Found line : [%s]", ucstring(&strArray[lastPos]).toString().c_str());
681 lines
.push_back(&strArray
[lastPos
]);
684 // nldebug("Found %u lines", lines.size());
686 // **** Do 2 pass.1st count the cell number, then fill. => avoid reallocation
689 for (i
=0; i
<lines
.size(); ++i
)
694 ucchar
*first
= lines
[i
];
695 for (; *first
!= 0; ++first
)
701 else if (*first
== '"' && first
==lines
[i
])
703 // read a quoted field.
705 while (*first
!= 0 && *first
!= '"' && *(first
+1) != 0 && *(first
+1) != '"')
708 if (*first
!= 0 && *first
== '"')
719 // take max cell of all lines
720 if (newColCount
!= max(newColCount
, numCells
))
722 newColCount
= max(newColCount
, numCells
);
723 nldebug("At line %u, numCol changed to %u",
729 // **** alloc / enlarge worksheet
730 // enlarge Worksheet column size, as needed
731 while (worksheet
.ColCount
< newColCount
)
732 worksheet
.insertColumn(worksheet
.ColCount
);
734 // enlarge Worksheet row size, as needed
735 uint startLine
= worksheet
.size();
736 worksheet
.resize(startLine
+ (uint
)lines
.size());
739 // **** fill worksheet
741 for (i
=0; i
<lines
.size(); ++i
)
747 ucchar
*first
= lines
[i
];
748 for (; *first
!= 0; ++first
)
752 // nldebug("Found cell [%s]", cell.toString().c_str());
753 worksheet
.setData(startLine
+ i
, numCells
, cell
);
757 else if (*first
== '"' && first
==lines
[i
])
759 // read a quoted field.
761 while (*first
!= 0 && *first
!= '"' && *(first
+1) != 0 && *(first
+1) != '"')
765 if (*first
!= 0 && *first
== '"')
777 // nldebug("Found cell [%s]", cell.toString().c_str());
779 worksheet
.setData(startLine
+ i
, numCells
, cell
);
781 nlassertex(numCells
<=newColCount
, ("readExcelSheet: bad row format: at line %u, the row has %u cell, max is %u", i
, numCells
, newColCount
));
782 // nldebug("Found %u cells in line %u", numCells, i);
786 // **** identifier uniqueness checking.
789 if (worksheet
.size() > 0)
791 // look for the first non '* tagged' or 'DIFF_CMD' column
793 while (nameCol
< worksheet
.ColCount
&& (*worksheet
.getData(0, nameCol
).begin() == uint16('*') || worksheet
.getData(0, nameCol
) == ucstring("DIFF_CMD")))
796 if (nameCol
< worksheet
.ColCount
)
798 // ok we can check unikness
801 set
<ucstring
>::iterator it
;
802 for (uint j
=0; j
<worksheet
.size(); ++j
)
804 it
= unik
.find(worksheet
.getData(j
, nameCol
));
805 if (it
!= unik
.end())
807 nlwarning("DT: readExcelSheet : identifier '%s' exist twice", worksheet
.getData(j
, nameCol
).toString().c_str() );
811 unik
.insert(worksheet
.getData(j
, nameCol
));
822 void makeHashCode(TWorksheet
&sheet
, bool forceRehash
)
824 if (!sheet
.Data
.empty())
826 TWorksheet::TRow::iterator it
= find(sheet
.Data
[0].begin(), sheet
.Data
[0].end(), ucstring("*HASH_VALUE"));
827 if (forceRehash
|| it
== sheet
.Data
[0].end())
829 // we need to generate HASH_VALUE column !
830 if (it
== sheet
.Data
[0].end())
832 sheet
.insertColumn(0);
833 sheet
.Data
[0][0] = ucstring("*HASH_VALUE");
837 vector
<bool> columnOk
;
838 columnOk
.resize(sheet
.ColCount
, false);
839 for (uint k
=1; k
<sheet
.ColCount
; ++k
)
841 if (sheet
.Data
[0][k
].find(ucstring("*")) != 0 && sheet
.Data
[0][k
].find(ucstring("DIFF ")) != 0)
847 // make hash for each line
849 for (uint j
=1; j
<sheet
.Data
.size(); ++j
)
852 for (uint k
=1; k
<sheet
.ColCount
; ++k
)
856 str
+= sheet
.Data
[j
][k
];
859 uint64 hash
= CI18N::makeHash(str
);
860 CI18N::hashToUCString(hash
, sheet
.Data
[j
][0]);
865 uint index
= (uint
)(it
- sheet
.Data
[0].begin());
866 for (uint j
=1; j
<sheet
.Data
.size(); ++j
)
868 ucstring
&field
= sheet
.Data
[j
][index
];
870 if (!field
.empty() && field
[0] == '_')
871 field
= field
.substr(1);
877 ucstring
prepareExcelSheet(const TWorksheet
&worksheet
)
879 if(worksheet
.Data
.empty())
882 // **** First pass: count approx the size
884 for (uint i
=0; i
<worksheet
.Data
.size(); ++i
)
886 for (uint j
=0; j
<worksheet
.Data
[i
].size(); ++j
)
888 approxSize
+= (uint
)worksheet
.Data
[i
][j
].size() + 1;
893 // Hash value for each column?
894 vector
<bool> hashValue
;
895 hashValue
.resize(worksheet
.Data
[0].size());
896 for (uint j
=0; j
<worksheet
.Data
[0].size(); ++j
)
898 hashValue
[j
]= worksheet
.Data
[0][j
] == ucstring("*HASH_VALUE");
901 // **** Second pass: fill
903 text
.reserve(approxSize
*2);
904 for (uint i
=0; i
<worksheet
.Data
.size(); ++i
)
906 for (uint j
=0; j
<worksheet
.Data
[i
].size(); ++j
)
908 if (i
> 0 && hashValue
[j
] && (!worksheet
.Data
[i
][j
].empty() && worksheet
.Data
[i
][j
][0] != '_'))
910 text
+= worksheet
.Data
[i
][j
];
911 if (j
!= worksheet
.Data
[i
].size()-1)
926 } // namespace STRING_MANAGER