Bump for 3.6-28
[LibreOffice.git] / l10ntools / source / gsicheck.cxx
blob3a4c0af5d7ec95156729c6571a3507529f8c5f41
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include <algorithm>
32 #include <cassert>
33 #include <cstddef>
34 #include <fstream>
35 #include <string>
37 #include <stdio.h>
39 #include <rtl/strbuf.hxx>
40 #include "sal/main.h"
41 #include "helper.hxx"
42 #include "tagtest.hxx"
43 #include "gsicheck.hxx"
45 /*****************************************************************************/
46 void PrintMessage( rtl::OString const & aType, rtl::OString const & aMsg, rtl::OString const & aPrefix,
47 rtl::OString const & aContext, sal_Bool bPrintContext, std::size_t nLine, rtl::OString aUniqueId )
48 /*****************************************************************************/
50 fprintf( stdout, "%s %s, Line %u", aType.getStr(), aPrefix.getStr(), static_cast<unsigned>( nLine ) );
51 if ( !aUniqueId.isEmpty() )
52 fprintf( stdout, ", UniqueID %s", aUniqueId.getStr() );
53 fprintf( stdout, ": %s", aMsg.getStr() );
55 if ( bPrintContext )
56 fprintf( stdout, " \"%s\"", aContext.getStr() );
57 fprintf( stdout, "\n" );
60 /*****************************************************************************/
61 void PrintError( rtl::OString const & aMsg, rtl::OString const & aPrefix,
62 rtl::OString const & aContext, sal_Bool bPrintContext, std::size_t nLine, rtl::OString const & aUniqueId )
63 /*****************************************************************************/
65 PrintMessage( "Error:", aMsg, aPrefix, aContext, bPrintContext, nLine, aUniqueId );
68 bool LanguageOK( rtl::OString const & aLang )
70 sal_Int32 n = 0;
71 rtl::OString t0(aLang.getToken(0, '-', n));
72 if (n == -1) {
73 return !t0.isEmpty()
74 && (helper::isAllAsciiDigits(t0)
75 || helper::isAllAsciiLowerCase(t0));
77 rtl::OString t1(aLang.getToken(0, '-', n));
78 return n == -1
79 && !t0.isEmpty() && helper::isAllAsciiLowerCase(t0)
80 && !t1.isEmpty() && helper::isAllAsciiUpperCase(t1)
81 && !t0.equalsIgnoreAsciiCase(t1);
84 void LazyStream::LazyOpen()
86 if ( !bOpened )
88 open(aFileName.getStr(), std::ios_base::out | std::ios_base::trunc);
89 if (!is_open())
91 fprintf( stderr, "\nERROR: Could not open Output-File %s!\n\n",
92 aFileName.getStr() );
93 exit ( 4 );
95 bOpened = true;
101 // class GSILine
104 /*****************************************************************************/
105 GSILine::GSILine( const rtl::OString &rLine, std::size_t nLine )
106 /*****************************************************************************/
107 : nLineNumber( nLine )
108 , bOK( sal_True )
109 , bFixed ( sal_False )
110 , data_( rLine )
112 if (rLine.isEmpty()) {
113 NotOK();
114 return;
117 aFormat = FORMAT_SDF;
118 sal_Int32 n = 0;
119 aUniqId = rLine.getToken(0, '\t', n); // token 0
120 aUniqId += "/";
121 aUniqId += rLine.getToken(0, '\t', n); // token 1
122 aUniqId += "/";
123 aUniqId += rLine.getToken(1, '\t', n); // token 3
124 aUniqId += "/";
125 rtl::OString gid(rLine.getToken(0, '\t', n)); // token 4
126 aUniqId += gid;
127 aUniqId += "/";
128 rtl::OString lid(rLine.getToken(0, '\t', n)); // token 5
129 aUniqId += lid;
130 aUniqId += "/";
131 aUniqId += rLine.getToken(0, '\t', n); // token 6
132 aUniqId += "/";
133 aUniqId += rLine.getToken(0, '\t', n); // token 7
134 rtl::OString length(rLine.getToken(0, '\t', n)); // token 8
135 aLineType = rtl::OString();
136 aLangId = rLine.getToken(0, '\t', n); // token 9
137 aText = rLine.getToken(0, '\t', n); // token 10
138 aQuickHelpText = rLine.getToken(1, '\t', n); // token 12
139 aTitle = rLine.getToken(0, '\t', n); // token 13
140 if (n == -1) {
141 NotOK();
142 return;
144 rLine.getToken(0, '\t', n); // token 14
145 if (n != -1) {
146 NotOK();
147 return;
150 // do some more format checks here
151 if (!helper::isAllAsciiDigits(length)) {
152 PrintError(
153 "The length field does not contain a number!", "Line format",
154 length, true, GetLineNumber(), GetUniqId());
155 NotOK();
157 if (!LanguageOK(aLangId)) {
158 PrintError(
159 "The Language is invalid!", "Line format", aLangId, true,
160 GetLineNumber(), GetUniqId());
161 NotOK();
163 // Limit GID and LID to MAX_GID_LID_LEN chars each for database conformity,
164 // see #137575#:
165 if (gid.getLength() > MAX_GID_LID_LEN || lid.getLength() > MAX_GID_LID_LEN)
167 PrintError(
168 (rtl::OString(
169 RTL_CONSTASCII_STRINGPARAM("GID and LID may only be "))
170 + rtl::OString::valueOf(MAX_GID_LID_LEN)
171 + rtl::OString(RTL_CONSTASCII_STRINGPARAM(" chars long each"))),
172 "Line format", aLangId, true, GetLineNumber(), GetUniqId());
173 NotOK();
177 /*****************************************************************************/
178 void GSILine::NotOK()
179 /*****************************************************************************/
181 bOK = sal_False;
184 /*****************************************************************************/
185 void GSILine::ReassembleLine()
186 /*****************************************************************************/
188 if (GetLineFormat() != FORMAT_SDF) {
189 PrintError(
190 "Cannot reassemble line of unknown type (internal Error).",
191 "Line format", rtl::OString(), false, GetLineNumber(),
192 GetUniqId());
193 return;
195 rtl::OStringBuffer b;
196 sal_Int32 n = 0;
197 for (sal_Int32 i = 0; i != 10; ++i) {
198 b.append(data_.getToken(0, '\t', n)); // token 0--9
199 b.append('\t');
201 b.append(aText);
202 b.append('\t');
203 b.append(data_.getToken(1, '\t', n));
204 // token 11; should be empty but there are some places in sc not
205 // reflected to sources
206 b.append('\t');
207 b.append(aQuickHelpText);
208 b.append('\t');
209 b.append(aTitle);
210 b.append('\t');
211 b.append(data_.getToken(2, '\t', n)); // token 14
212 data_ = b.makeStringAndClear();
216 // class GSIBlock
218 /*****************************************************************************/
219 GSIBlock::GSIBlock( sal_Bool PbPrintContext, sal_Bool bSource, sal_Bool bTrans, sal_Bool bRef, sal_Bool bAllowSusp )
220 /*****************************************************************************/
221 : pSourceLine( NULL )
222 , pReferenceLine( NULL )
223 , bPrintContext( PbPrintContext )
224 , bCheckSourceLang( bSource )
225 , bCheckTranslationLang( bTrans )
226 , bReference( bRef )
227 , bAllowSuspicious( bAllowSusp )
228 , bHasBlockError( sal_False )
232 /*****************************************************************************/
233 GSIBlock::~GSIBlock()
234 /*****************************************************************************/
236 delete pSourceLine;
237 delete pReferenceLine;
239 for ( size_t i = 0, n = maList.size(); i < n; ++i )
240 delete maList[ i ];
241 maList.clear();
244 void GSIBlock::InsertLine( GSILine* pLine, const rtl::OString &rSourceLang)
246 if ( pLine->GetLanguageId() == rSourceLang )
248 if ( pSourceLine )
250 PrintError( "Source Language entry double. Treating as Translation.", "File format", "", pLine->GetLineNumber(), pLine->GetUniqId() );
251 bHasBlockError = sal_True;
252 pSourceLine->NotOK();
253 pLine->NotOK();
255 else
257 pSourceLine = pLine;
258 return;
262 if (!rSourceLang.isEmpty()) // only check blockstructure if source lang is given
264 for ( size_t nPos = 0, n = maList.size(); nPos < n; ++nPos )
266 if ( maList[ nPos ]->GetLanguageId() == pLine->GetLanguageId() )
268 PrintError( "Translation Language entry double. Checking both.", "File format", "", pLine->GetLineNumber(), pLine->GetUniqId() );
269 bHasBlockError = sal_True;
270 maList[ nPos ]->NotOK();
271 pLine->NotOK();
273 nPos++;
276 maList.push_back( pLine );
279 /*****************************************************************************/
280 void GSIBlock::SetReferenceLine( GSILine* pLine )
281 /*****************************************************************************/
283 pReferenceLine = pLine;
286 /*****************************************************************************/
287 void GSIBlock::PrintMessage( rtl::OString const & aType, rtl::OString const & aMsg, rtl::OString const & aPrefix,
288 rtl::OString const & aContext, std::size_t nLine, rtl::OString const & aUniqueId )
289 /*****************************************************************************/
291 ::PrintMessage( aType, aMsg, aPrefix, aContext, bPrintContext, nLine, aUniqueId );
294 /*****************************************************************************/
295 void GSIBlock::PrintError( rtl::OString const & aMsg, rtl::OString const & aPrefix,
296 rtl::OString const & aContext, std::size_t nLine, rtl::OString const & aUniqueId )
297 /*****************************************************************************/
299 PrintMessage( "Error:", aMsg, aPrefix, aContext, nLine, aUniqueId );
302 /*****************************************************************************/
303 void GSIBlock::PrintList( ParserMessageList *pList, rtl::OString const & aPrefix,
304 GSILine *pLine )
305 /*****************************************************************************/
307 for ( size_t i = 0 ; i < pList->size() ; i++ )
309 ParserMessage *pMsg = (*pList)[ i ];
310 rtl::OString aContext;
311 if ( bPrintContext )
313 if ( pMsg->GetTagBegin() == -1 )
314 aContext = pLine->GetText().copy( 0, 300 );
315 else
316 aContext = helper::abbreviate( pLine->data_, pMsg->GetTagBegin()-150, 300 );
317 aContext = aContext.trim();
320 PrintMessage( pMsg->Prefix(), pMsg->GetErrorText(), aPrefix, aContext, pLine->GetLineNumber(), pLine->GetUniqId() );
324 /*****************************************************************************/
325 sal_Bool GSIBlock::IsUTF8( const rtl::OString &aTestee, sal_Bool bFixTags, sal_Int32 &nErrorPos, rtl::OString &aErrorMsg, sal_Bool &bHasBeenFixed, rtl::OString &aFixed ) const
326 /*****************************************************************************/
328 rtl::OUString aUTF8Tester(
329 rtl::OStringToOUString(aTestee, RTL_TEXTENCODING_UTF8));
330 rtl::OString aTestee2(
331 rtl::OUStringToOString(aUTF8Tester, RTL_TEXTENCODING_UTF8));
332 sal_Int32 i = 0;
333 while (i != std::min(aTestee.getLength(), aTestee2.getLength())
334 && aTestee[i] == aTestee2[i])
336 ++i;
338 if (i != aTestee.getLength() || i != aTestee2.getLength())
340 aUTF8Tester = rtl::OUString(aTestee.getStr(), i, RTL_TEXTENCODING_UTF8);
341 nErrorPos = aUTF8Tester.getLength();
342 aErrorMsg = "UTF8 Encoding seems to be broken";
343 return sal_False;
346 nErrorPos = helper::indexOfAnyAsciiL(
347 aUTF8Tester,
348 RTL_CONSTASCII_STRINGPARAM(
349 "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0b\x0c\x0e\x0f\x10\x11\x12"
350 "\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f"));
351 if (nErrorPos != -1)
353 aErrorMsg = "String contains illegal character";
354 return sal_False;
357 if ( bFixTags )
359 bHasBeenFixed = sal_False;
360 aFixed = rtl::OString();
363 return sal_True;
366 /*****************************************************************************/
367 sal_Bool GSIBlock::TestUTF8( GSILine* pTestee, sal_Bool bFixTags )
368 /*****************************************************************************/
370 sal_Int32 nErrorPos = 0;
371 rtl::OString aErrorMsg;
372 sal_Bool bError = sal_False;
373 rtl::OString aFixed;
374 sal_Bool bHasBeenFixed = sal_False;
375 if ( !IsUTF8( pTestee->GetText(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
377 rtl::OString aContext(copyUpTo(pTestee->GetText(), nErrorPos, 20));
378 PrintError(rtl::OStringBuffer(aErrorMsg).append(RTL_CONSTASCII_STRINGPARAM(" in Text at Position "))
379 .append(nErrorPos).getStr(),
380 "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId());
381 bError = sal_True;
382 if ( bHasBeenFixed )
384 pTestee->SetText( aFixed );
385 pTestee->SetFixed();
388 if ( !IsUTF8( pTestee->GetQuickHelpText(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
390 rtl::OString aContext(
391 copyUpTo(pTestee->GetQuickHelpText(), nErrorPos, 20));
392 PrintError(rtl::OStringBuffer(aErrorMsg).append(RTL_CONSTASCII_STRINGPARAM(" in QuickHelpText at Position "))
393 .append(nErrorPos).getStr(),
394 "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId());
395 bError = sal_True;
396 if ( bHasBeenFixed )
398 pTestee->SetQuickHelpText( aFixed );
399 pTestee->SetFixed();
402 if ( !IsUTF8( pTestee->GetTitle(), bFixTags, nErrorPos, aErrorMsg, bHasBeenFixed, aFixed ) )
404 rtl::OString aContext( pTestee->GetTitle().copy( nErrorPos, 20 ) );
405 PrintError(rtl::OStringBuffer(aErrorMsg).append(RTL_CONSTASCII_STRINGPARAM(" in Title at Position "))
406 .append(nErrorPos).getStr(),
407 "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId());
408 bError = sal_True;
409 if ( bHasBeenFixed )
411 pTestee->SetTitle( aFixed );
412 pTestee->SetFixed();
415 if ( bError )
416 pTestee->NotOK();
417 return !bError;
421 /*****************************************************************************/
422 sal_Bool GSIBlock::HasSuspiciousChars( GSILine* pTestee, GSILine* pSource )
423 /*****************************************************************************/
425 sal_Int32 nPos = 0;
426 if ( !bAllowSuspicious && ( nPos = pTestee->GetText().indexOf("??")) != -1 )
427 if ( pSource->GetText().indexOf("??") == -1 )
429 rtl::OUString aUTF8Tester(
430 rtl::OStringToOUString(
431 pTestee->GetText().copy(0, nPos), RTL_TEXTENCODING_UTF8));
432 sal_Int32 nErrorPos = aUTF8Tester.getLength();
433 rtl::OString aContext( helper::abbreviate( pTestee->GetText(), nPos, 20 ) );
434 PrintError(rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Found double questionmark in translation only. Looks like an encoding problem at Position "))
435 .append(nErrorPos).getStr(),
436 "Text format", aContext, pTestee->GetLineNumber(), pTestee->GetUniqId());
437 pTestee->NotOK();
438 return sal_True;
441 return sal_False;
445 /*****************************************************************************/
446 sal_Bool GSIBlock::CheckSyntax( std::size_t nLine, sal_Bool bRequireSourceLine, sal_Bool bFixTags )
447 /*****************************************************************************/
449 static LingTest aTester;
450 sal_Bool bHasError = sal_False;
452 if ( !pSourceLine )
454 if ( bRequireSourceLine )
456 PrintError( "No source language entry defined!", "File format", "", nLine );
457 bHasBlockError = sal_True;
460 else
462 aTester.CheckReference( pSourceLine );
463 if ( pSourceLine->HasMessages() )
465 PrintList( pSourceLine->GetMessageList(), "ReferenceString", pSourceLine );
466 pSourceLine->NotOK();
467 bHasError = sal_True;
470 if ( bReference )
472 if ( !pReferenceLine )
474 GSILine *pSource;
475 if ( pSourceLine )
476 pSource = pSourceLine;
477 else
478 pSource = maList.empty() ? NULL : maList[ 0 ]; // get some other line
479 if ( pSource )
480 PrintError( "No reference line found. Entry is new in source file", "File format", "", pSource->GetLineNumber(), pSource->GetUniqId() );
481 else
482 PrintError( "No reference line found. Entry is new in source file", "File format", "", nLine );
483 bHasBlockError = sal_True;
485 else
487 if ( pSourceLine && pSourceLine->data_ != pReferenceLine->data_ )
489 sal_Int32 nPos = pSourceLine->data_.indexOf( pReferenceLine->data_ );
490 rtl::OStringBuffer aContext( pReferenceLine->data_.copy( nPos - 5, 15) );
491 aContext.append( "\" --> \"" ).append( pSourceLine->data_.copy( nPos - 5, 15) );
492 PrintError( "Source Language Entry has changed.", "File format", aContext.makeStringAndClear(), pSourceLine->GetLineNumber(), pSourceLine->GetUniqId() );
493 pSourceLine->NotOK();
494 bHasError = sal_True;
499 if ( pSourceLine )
500 bHasError |= !TestUTF8( pSourceLine, bFixTags );
502 for ( size_t i = 0, n = maList.size(); i < n; ++i )
504 GSILine* pItem = maList[ i ];
505 aTester.CheckTestee( pItem, pSourceLine != NULL, bFixTags );
506 if ( pItem->HasMessages() || aTester.HasCompareWarnings() )
508 if ( pItem->HasMessages() || aTester.GetCompareWarnings().HasErrors() )
509 pItem->NotOK();
510 bHasError = sal_True;
511 PrintList( pItem->GetMessageList(), "Translation", pItem );
512 PrintList( &(aTester.GetCompareWarnings()), "Translation Tag Mismatch", pItem );
514 bHasError |= !TestUTF8( pItem, bFixTags );
515 if ( pSourceLine )
516 bHasError |= HasSuspiciousChars( pItem, pSourceLine );
519 return bHasError || bHasBlockError;
522 void GSIBlock::WriteError( LazyStream &aErrOut, sal_Bool bRequireSourceLine )
524 if ( pSourceLine && pSourceLine->IsOK() && bCheckSourceLang && !bHasBlockError )
525 return;
527 sal_Bool bHasError = sal_False;
528 sal_Bool bCopyAll = ( !pSourceLine && bRequireSourceLine ) || ( pSourceLine && !pSourceLine->IsOK() && !bCheckTranslationLang ) || bHasBlockError;
529 for ( size_t i = 0, n = maList.size(); i < n; ++i )
531 GSILine* pItem = maList[ i ];
532 if ( !pItem->IsOK() || bCopyAll )
534 bHasError = sal_True;
535 aErrOut.LazyOpen();
536 aErrOut << pItem->data_.getStr() << '\n';
540 if ( pSourceLine && ( bHasError || !pSourceLine->IsOK() ) && !( !bHasError && bCheckTranslationLang ) )
542 aErrOut.LazyOpen();
543 aErrOut << pSourceLine->data_.getStr() << '\n';
547 void GSIBlock::WriteCorrect( LazyStream &aOkOut, sal_Bool bRequireSourceLine )
549 if ( ( !pSourceLine && bRequireSourceLine ) || ( pSourceLine && !pSourceLine->IsOK() && !bCheckTranslationLang ) )
550 return;
552 sal_Bool bHasOK = sal_False;
553 for ( size_t i = 0, n = maList.size(); i < n; ++i )
555 GSILine* pItem = maList[ i ];
556 if ( ( pItem->IsOK() || bCheckSourceLang ) && !bHasBlockError )
558 bHasOK = sal_True;
559 aOkOut.LazyOpen();
560 aOkOut << pItem->data_.getStr() << '\n';
564 if ( ( pSourceLine && pSourceLine->IsOK() && ( !maList.empty() || !bCheckTranslationLang ) ) || ( bHasOK && bCheckTranslationLang ) )
566 aOkOut.LazyOpen();
567 aOkOut << pSourceLine->data_.getStr() << '\n';
571 void GSIBlock::WriteFixed( LazyStream &aFixOut )
573 if ( pSourceLine && !pSourceLine->IsFixed() && bCheckSourceLang )
574 return;
576 sal_Bool bHasFixes = sal_False;
577 for ( size_t i = 0, n = maList.size(); i < n; ++i )
579 GSILine* pItem = maList[ i ];
580 if ( pItem->IsFixed() )
582 bHasFixes = sal_True;
583 aFixOut.LazyOpen();
584 aFixOut << pItem->data_.getStr() << '\n';
588 if ( pSourceLine && ( bHasFixes || pSourceLine->IsFixed() ) )
590 aFixOut.LazyOpen();
591 aFixOut << pSourceLine->data_.getStr() << '\n';
595 sal_Bool check(rtl::OString s, std::size_t nLine)
597 sal_Bool bFileHasError = sal_False;
598 GSILine* pGSILine = NULL;
599 GSIBlock *pBlock = NULL;
600 pGSILine = new GSILine(s, nLine );
602 if ( !pGSILine->data_.isEmpty() )
604 if ( FORMAT_UNKNOWN == pGSILine->GetLineFormat() )
606 PrintError( "Format of line is unknown. Ignoring!", "Line format", pGSILine->data_.copy( 0,40 ), sal_True, pGSILine->GetLineNumber() );
607 bFileHasError = sal_True;
608 pGSILine->NotOK();
610 else if ( !(pGSILine->GetLineType().equalsIgnoreAsciiCaseL(RTL_CONSTASCII_STRINGPARAM("res-comment"))) )
612 pBlock = new GSIBlock( sal_True, sal_False, sal_False, sal_False, sal_False );
613 pBlock->InsertLine( pGSILine, rtl::OString() );
614 bFileHasError |= pBlock->CheckSyntax( nLine, sal_False, sal_False );
615 if (pBlock!=NULL)
617 delete pBlock;
618 pBlock=NULL;
619 pGSILine=NULL;
623 delete pGSILine;
624 if ( bFileHasError )
625 return false;
626 else
627 return true;
630 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */