Bump for 3.6-28
[LibreOffice.git] / l10ntools / source / tagtest.cxx
blob062c5c78a5838cc166ff84aaf0008b2064b8ac76
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
29 #include "sal/config.h"
31 #include "rtl/strbuf.hxx"
32 #include "rtl/string.hxx"
33 #include "rtl/ustrbuf.hxx"
34 #include "rtl/ustring.hxx"
36 #include "tagtest.hxx"
38 #if OSL_DEBUG_LEVEL > 1
39 #include <stdio.h>
40 #endif
42 #include "gsicheck.hxx"
43 #include "helper.hxx"
45 #define HAS_FLAG( nFlags, nFlag ) ( ( nFlags & nFlag ) != 0 )
46 #define SET_FLAG( nFlags, nFlag ) ( nFlags |= nFlag )
47 #define RESET_FLAG( nFlags, nFlag ) ( nFlags &= ~nFlag ) // ~ = Bitweises NOT
51 TokenInfo::TokenInfo( TokenId pnId, sal_Int32 nP, rtl::OUString const & paStr, ParserMessageList &rErrorList )
52 : bClosed(sal_False)
53 , bCloseTag(sal_False)
54 , bIsBroken(sal_False)
55 , bHasBeenFixed(sal_False)
56 , bDone(sal_False)
57 , aTokenString( paStr )
58 , nId( pnId )
59 , nPos(nP)
61 if ( nId == TAG_COMMONSTART || nId == TAG_COMMONEND )
62 SplitTag( rErrorList );
65 enum tagcheck { TC_START, TC_HAS_TAG_NAME, TC_HAS_PROP_NAME_EQ, TC_HAS_PROP_NAME_EQ_SP, TC_HAS_PROP_NAME_SP, TC_INSIDE_STRING, TC_PROP_FINISHED, TC_CLOSED, TC_CLOSED_SPACE, TC_CLOSETAG, TC_CLOSETAG_HAS_TAG_NAME, TC_FINISHED, TC_ERROR };
68 \< link href = \"text\" name = \"C\" \>
69 START ' ' -> HAS_TAG_NAME
70 START '/' -> CLOSED
71 START '/' -> CLOSETAG - no Portion (starting with /)
72 START '>' -> FINISHED
73 HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ
74 HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP
75 HAS_TAG_NAME '/' -> CLOSED
76 HAS_TAG_NAME '>' -> FINISHED
77 HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ
78 HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP
79 HAS_PROP_NAME_EQ '"' -> INSIDE_STRING
80 HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING
81 INSIDE_STRING ' ' -> INSIDE_STRING
82 INSIDE_STRING '=' -> INSIDE_STRING
83 INSIDE_STRING '>' -> INSIDE_STRING
84 INSIDE_STRING '"' -> PROP_FINISHED
85 PROP_FINISHED ' ' -> HAS_TAG_NAME
86 PROP_FINISHED '/' -> CLOSED
87 PROP_FINISHED '>' -> FINISHED
88 CLOSED ' ' -> CLOSED_SPACE
89 CLOSED '>' -> FINISHED
90 CLOSED_SPACE '>' -> FINISHED
92 CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME
93 CLOSETAG '>' -> FINISHED
94 CLOSETAG_HAS_TAG_NAME '>' -> FINISHED
97 void TokenInfo::SplitTag( ParserMessageList &rErrorList )
99 sal_Int32 nLastPos = 2; // skip initial \<
100 sal_Int32 nCheckPos = nLastPos;
101 static char const aDelims[] = " \\=>/";
102 rtl::OUString aPortion;
103 rtl::OUString aValue; // store the value of a property
104 rtl::OString aName; // store the name of a property/tag
105 sal_Bool bCheckName = sal_False;
106 sal_Bool bCheckEmpty = sal_False;
107 sal_Unicode cDelim;
108 tagcheck aState = TC_START;
110 // skip blanks
111 while ( nLastPos < aTokenString.getLength() && aTokenString[nLastPos] == ' ')
112 nLastPos++;
114 nCheckPos = helper::indexOfAnyAsciiL(
115 aTokenString, RTL_CONSTASCII_STRINGPARAM(aDelims), nLastPos);
116 while ( nCheckPos != -1 && !( aState == TC_FINISHED || aState == TC_ERROR ) )
118 aPortion = aTokenString.copy( nLastPos, nCheckPos-nLastPos );
120 if ( aTokenString[nCheckPos] == '\\' )
121 nCheckPos++;
123 cDelim = aTokenString[nCheckPos];
124 nCheckPos++;
126 switch ( aState )
128 // START ' ' -> HAS_TAG_NAME
129 // START '/' -> CLOSED
130 // START '>' -> FINISHED
131 case TC_START:
132 aTagName = aPortion;
133 switch ( cDelim )
135 case ' ': aState = TC_HAS_TAG_NAME;
136 bCheckName = sal_True;
137 break;
138 case '/':
140 if (aPortion.isEmpty())
142 aState = TC_CLOSETAG;
144 else
146 aState = TC_CLOSED;
147 bCheckName = sal_True;
150 break;
151 case '>': aState = TC_FINISHED;
152 bCheckName = sal_True;
153 break;
154 default: aState = TC_ERROR;
156 break;
158 // HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ
159 // HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP
160 // HAS_TAG_NAME '/' -> CLOSED
161 // HAS_TAG_NAME '>' -> FINISHED
162 case TC_HAS_TAG_NAME:
163 switch ( cDelim )
165 case '=': aState = TC_HAS_PROP_NAME_EQ;
166 bCheckName = sal_True;
167 break;
168 case ' ': aState = TC_HAS_PROP_NAME_SP;
169 bCheckName = sal_True;
170 break;
171 case '/': aState = TC_CLOSED;
172 bCheckEmpty = sal_True;
173 break;
174 case '>': aState = TC_FINISHED;
175 bCheckEmpty = sal_True;
176 break;
177 default: aState = TC_ERROR;
179 break;
181 // HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ
182 case TC_HAS_PROP_NAME_SP:
183 switch ( cDelim )
185 case '=': aState = TC_HAS_PROP_NAME_EQ;
186 bCheckEmpty = sal_True;
187 break;
188 default: aState = TC_ERROR;
190 break;
192 // HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP
193 // HAS_PROP_NAME_EQ '"' -> INSIDE_STRING
194 case TC_HAS_PROP_NAME_EQ:
195 switch ( cDelim )
197 case ' ': aState = TC_HAS_PROP_NAME_EQ_SP;
198 bCheckEmpty = sal_True;
199 break;
200 case '\"': aState = TC_INSIDE_STRING;
201 bCheckEmpty = sal_True;
202 aValue = rtl::OUString();
203 break;
204 default: aState = TC_ERROR;
206 break;
208 // HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING
209 case TC_HAS_PROP_NAME_EQ_SP:
210 switch ( cDelim )
212 case '\"': aState = TC_INSIDE_STRING;
213 bCheckEmpty = sal_True;
214 aValue = rtl::OUString();
215 break;
216 default: aState = TC_ERROR;
218 break;
220 // INSIDE_STRING * -> INSIDE_STRING
221 // INSIDE_STRING '"' -> PROP_FINISHED
222 case TC_INSIDE_STRING:
223 switch ( cDelim )
225 case '\"':
227 aState = TC_PROP_FINISHED;
228 aValue += aPortion;
229 if ( aProperties.find( aName ) == aProperties.end() )
231 if ( !IsPropertyValueValid( aName, aValue ) )
233 rErrorList.AddError( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Property '")).append(aName).append(RTL_CONSTASCII_STRINGPARAM("' has invalid value '")).append(rtl::OUStringToOString(aValue, RTL_TEXTENCODING_UTF8)).append("' ").makeStringAndClear(), *this );
234 bIsBroken = sal_True;
236 aProperties[ aName ] = aValue;
238 else
240 rErrorList.AddError( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Property '")).append(aName).append(RTL_CONSTASCII_STRINGPARAM("' defined twice ")).makeStringAndClear(), *this );
241 bIsBroken = sal_True;
244 break;
245 default:
247 aState = TC_INSIDE_STRING;
248 aValue += aPortion;
249 aValue += rtl::OUString(cDelim);
252 break;
254 // PROP_FINISHED ' ' -> HAS_TAG_NAME
255 // PROP_FINISHED '/' -> CLOSED
256 // PROP_FINISHED '>' -> FINISHED
257 case TC_PROP_FINISHED:
258 switch ( cDelim )
260 case ' ': aState = TC_HAS_TAG_NAME;
261 bCheckEmpty = sal_True;
262 break;
263 case '/': aState = TC_CLOSED;
264 bCheckEmpty = sal_True;
265 break;
266 case '>': aState = TC_FINISHED;
267 bCheckEmpty = sal_True;
268 break;
269 default: aState = TC_ERROR;
271 break;
273 // CLOSED ' ' -> CLOSED_SPACE
274 // CLOSED '>' -> FINISHED
275 case TC_CLOSED:
276 switch ( cDelim )
278 case ' ': aState = TC_CLOSED_SPACE;
279 bCheckEmpty = sal_True;
280 bClosed = sal_True;
281 break;
282 case '>': aState = TC_FINISHED;
283 bCheckEmpty = sal_True;
284 break;
285 default: aState = TC_ERROR;
287 break;
289 // CLOSED_SPACE '>' -> FINISHED
290 case TC_CLOSED_SPACE:
291 switch ( cDelim )
293 case '>': aState = TC_FINISHED;
294 bCheckEmpty = sal_True;
295 break;
296 default: aState = TC_ERROR;
298 break;
300 // CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME
301 // CLOSETAG '>' -> FINISHED
302 case TC_CLOSETAG:
303 bCloseTag = sal_True;
304 switch ( cDelim )
306 case ' ': aState = TC_CLOSETAG_HAS_TAG_NAME;
307 aTagName = aPortion;
308 bCheckName = sal_True;
309 break;
310 case '>': aState = TC_FINISHED;
311 aTagName = aPortion;
312 bCheckName = sal_True;
313 break;
314 default: aState = TC_ERROR;
316 break;
318 // CLOSETAG_HAS_TAG_NAME '>' -> FINISHED
319 case TC_CLOSETAG_HAS_TAG_NAME:
320 switch ( cDelim )
322 case '>': aState = TC_FINISHED;
323 bCheckEmpty = sal_True;
324 break;
325 default: aState = TC_ERROR;
327 break;
330 default: rErrorList.AddError( 99, "Internal error Parsing Tag ", *this );
331 bIsBroken = sal_True;
335 if ( bCheckName )
337 if (aPortion.isEmpty())
339 rErrorList.AddError( 25, "Tag/Property name missing ", *this );
340 bIsBroken = sal_True;
342 else
344 aName = rtl::OUStringToOString(aPortion, RTL_TEXTENCODING_UTF8);
345 // "a-zA-Z_-.0-9"
346 sal_Bool bBroken = sal_False;
347 const sal_Char* aBuf = aName.getStr();
348 for (sal_Int32 nCount = 0 ; !bBroken && nCount < aName.getLength() ; ++nCount)
350 bBroken = ! ( ( aBuf[nCount] >= 'a' && aBuf[nCount] <= 'z' )
351 ||( aBuf[nCount] >= 'A' && aBuf[nCount] <= 'Z' )
352 ||( aBuf[nCount] >= '0' && aBuf[nCount] <= '9' )
353 ||( aBuf[nCount] == '_' )
354 ||( aBuf[nCount] == '-' )
355 ||( aBuf[nCount] == '.' )
359 if ( bBroken )
361 rErrorList.AddError( 25, "Found illegal character in Tag/Property name ", *this );
362 bIsBroken = sal_True;
366 bCheckName = sal_False;
369 if ( bCheckEmpty )
371 if (!aPortion.isEmpty())
373 rErrorList.AddError( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Found displaced characters '")).append(rtl::OUStringToOString(aPortion, RTL_TEXTENCODING_UTF8)).append(RTL_CONSTASCII_STRINGPARAM("' in Tag ")).makeStringAndClear(), *this );
374 bIsBroken = sal_True;
376 bCheckEmpty = sal_False;
380 nLastPos = nCheckPos;
382 // skip further blanks
383 if ( cDelim == ' ' && aState != TC_INSIDE_STRING )
384 while ( nLastPos < aTokenString.getLength() && aTokenString[nLastPos] == ' ')
385 nLastPos++;
387 nCheckPos = helper::indexOfAnyAsciiL(
388 aTokenString, RTL_CONSTASCII_STRINGPARAM(aDelims), nLastPos);
390 if ( aState != TC_FINISHED )
392 rErrorList.AddError( 25, "Parsing error in Tag ", *this );
393 bIsBroken = sal_True;
397 sal_Bool TokenInfo::IsPropertyRelevant( const rtl::OString &rName, const rtl::OUString &rValue ) const
399 if ( aTagName == "alt" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("xml-lang")) )
400 return sal_False;
401 if ( aTagName == "ahelp" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("visibility")) && rValue == "visible" )
402 return sal_False;
403 if ( aTagName == "image" && (rName.equalsL(RTL_CONSTASCII_STRINGPARAM("width")) || rName.equalsL(RTL_CONSTASCII_STRINGPARAM("height"))) )
404 return sal_False;
406 return sal_True;
409 sal_Bool TokenInfo::IsPropertyValueValid( const rtl::OString &rName, const rtl::OUString &rValue ) const
411 /* removed due to i56740
412 if ( aTagName.EqualsAscii( "switchinline" ) && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("select")) )
414 return rValue.EqualsAscii("sys") ||
415 rValue.EqualsAscii("appl") ||
416 rValue.EqualsAscii("distrib");
417 } */
418 if ( aTagName == "caseinline" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("select")) )
420 return !rValue.isEmpty();
423 // we don't know any better so we assume it to be OK
424 return sal_True;
427 sal_Bool TokenInfo::IsPropertyInvariant( const rtl::OString &rName, const rtl::OUString &rValue ) const
429 if ( aTagName == "link" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("name")) )
430 return sal_False;
431 if ( aTagName == "link" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("href")) )
432 { // check for external reference
433 return
434 !(rValue.matchIgnoreAsciiCaseAsciiL(
435 RTL_CONSTASCII_STRINGPARAM("http:"))
436 || rValue.matchIgnoreAsciiCaseAsciiL(
437 RTL_CONSTASCII_STRINGPARAM("https:"))
438 || rValue.matchIgnoreAsciiCaseAsciiL(
439 RTL_CONSTASCII_STRINGPARAM("ftp:")));
441 return sal_True;
444 sal_Bool TokenInfo::IsPropertyFixable( const rtl::OString &rName ) const
446 // name everything that is allowed to be fixed automatically here
447 if ( (aTagName == "ahelp" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("hid")))
448 || (aTagName == "link" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("href")))
449 || (aTagName == "alt" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("id")))
450 || (aTagName == "variable" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("id")))
451 || (aTagName == "image" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("src")))
452 || (aTagName == "image" && rName.equalsL(RTL_CONSTASCII_STRINGPARAM("id")) ))
453 return sal_True;
454 return sal_False;
457 sal_Bool TokenInfo::MatchesTranslation( TokenInfo& rInfo, sal_Bool bGenErrors, ParserMessageList &rErrorList, sal_Bool bFixTags ) const
459 // check if tags are equal
460 // check if all existing properties are in the translation as well and
461 // whether they have a matching content (the same in most cases)
463 if ( nId != rInfo.nId )
464 return sal_False;
466 if ( aTagName != rInfo.aTagName )
467 return sal_False;
469 // If one of the tags has formating errors already it does make no sense to check here, so return right away
470 if ( bGenErrors && ( bIsBroken || rInfo.bIsBroken ) )
471 return sal_True;
473 StringHashMap::const_iterator iProp;
474 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
476 if ( rInfo.aProperties.find( iProp->first ) != rInfo.aProperties.end() )
478 if ( IsPropertyRelevant( iProp->first, iProp->second ) || IsPropertyRelevant( iProp->first, rInfo.aProperties.find( iProp->first )->second ) )
480 if ( IsPropertyInvariant( iProp->first, iProp->second ) )
482 if ( rInfo.aProperties.find( iProp->first )->second != iProp->second )
484 if ( bGenErrors )
486 if ( bFixTags && IsPropertyFixable( iProp->first ) )
488 rInfo.aProperties.find( iProp->first )->second = iProp->second;
489 rInfo.SetHasBeenFixed();
490 rErrorList.AddWarning( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Property '")).append(iProp->first).append(RTL_CONSTASCII_STRINGPARAM("': FIXED different value in Translation ")).makeStringAndClear(), *this );
492 else
493 rErrorList.AddError( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Property '")).append(iProp->first).append(RTL_CONSTASCII_STRINGPARAM("': value different in Translation ")).makeStringAndClear(), *this );
495 else return sal_False;
500 else
502 if ( IsPropertyRelevant( iProp->first, iProp->second ) )
504 if ( bGenErrors )
505 rErrorList.AddError( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Property '")).append(iProp->first).append(RTL_CONSTASCII_STRINGPARAM("' missing in Translation ")).makeStringAndClear(), *this );
506 else return sal_False;
510 for( iProp = rInfo.aProperties.begin() ; iProp != rInfo.aProperties.end(); ++iProp )
512 if ( aProperties.find( iProp->first ) == aProperties.end() )
514 if ( IsPropertyRelevant( iProp->first, iProp->second ) )
516 if ( bGenErrors )
517 rErrorList.AddError( 25, rtl::OStringBuffer(RTL_CONSTASCII_STRINGPARAM("Extra Property '")).append(iProp->first).append(RTL_CONSTASCII_STRINGPARAM("' in Translation ")).makeStringAndClear(), rInfo );
518 else return sal_False;
523 // if we reach here eather
524 // the tags match completely or
525 // the tags match but not the properties and we generated errors for that
526 return sal_True;
529 rtl::OUString TokenInfo::GetTagName() const
531 return aTagName;
534 rtl::OUString TokenInfo::MakeTag() const
536 rtl::OUStringBuffer aRet;
537 aRet.appendAscii("\\<");
538 if ( bCloseTag )
539 aRet.appendAscii("/");
540 aRet.append( GetTagName() );
541 StringHashMap::const_iterator iProp;
543 for( iProp = aProperties.begin() ; iProp != aProperties.end(); ++iProp )
545 aRet.appendAscii(" ");
546 aRet.append( rtl::OStringToOUString( iProp->first, RTL_TEXTENCODING_UTF8 ) );
547 aRet.appendAscii("=\\\"");
548 aRet.append( iProp->second );
549 aRet.appendAscii("\\\"");
551 if ( bClosed )
552 aRet.appendAscii("/");
553 aRet.appendAscii("\\>");
554 return aRet.makeStringAndClear();
558 void ParserMessageList::AddError( sal_Int32 nErrorNr, const rtl::OString& rErrorText, const TokenInfo &rTag )
560 maList.push_back( new ParserError( nErrorNr, rErrorText, rTag ) );
563 void ParserMessageList::AddWarning( sal_Int32 nErrorNr, const rtl::OString& rErrorText, const TokenInfo &rTag )
565 maList.push_back( new ParserWarning( nErrorNr, rErrorText, rTag ) );
568 sal_Bool ParserMessageList::HasErrors()
570 for ( size_t i = 0, n = maList.size(); i < n; ++i )
571 if ( maList[ i ]->IsError() )
572 return sal_True;
573 return sal_False;
576 void ParserMessageList::clear()
578 for ( size_t i = 0, n = maList.size(); i < n; ++i )
579 delete maList[ i ];
580 maList.clear();
583 struct Tag
585 rtl::OUString GetName() const { return rtl::OUString::createFromAscii( pName ); };
586 const char* pName;
587 TokenId nTag;
591 static const Tag aKnownTags[] =
593 /* commenting oldstyle tags
594 // { "<#GROUP_FORMAT>", TAG_GROUP_FORMAT },
595 { "<#BOLD>", TAG_BOLDON },
596 { "<#/BOLD>", TAG_BOLDOFF },
597 { "<#ITALIC>", TAG_ITALICON },
598 { "<#/ITALIC>", TAG_ITALICOFF },
599 { "<#UNDER>", TAG_UNDERLINEON },
600 { "<#/UNDER>", TAG_UNDERLINEOFF },
602 // { "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED },
603 { "<#HELPID>", TAG_HELPID },
604 { "<#MODIFY>", TAG_MODIFY },
605 { "<#REFNR>", TAG_REFNR },
607 // { "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE },
608 { "<#NAME>", TAG_NAME },
609 { "<#HREF>", TAG_HREF },
610 { "<#AVIS>", TAG_AVIS },
611 { "<#AHID>", TAG_AHID },
612 { "<#AEND>", TAG_AEND },
614 { "<#TITEL>", TAG_TITEL },
615 { "<#KEY>", TAG_KEY },
616 { "<#INDEX>", TAG_INDEX },
618 { "<#REFSTART>", TAG_REFSTART },
620 { "<#GRAPHIC>", TAG_GRAPHIC },
621 { "<#NEXTVERSION>", TAG_NEXTVERSION },
623 // { "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH },
624 { "<#WIN>", TAG_WIN },
625 { "<#UNIX>", TAG_UNIX },
626 { "<#MAC>", TAG_MAC },
627 { "<#OS2>", TAG_OS2 },
629 // { "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH },
630 { "<#WRITER>", TAG_WRITER },
631 { "<#CALC>", TAG_CALC },
632 { "<#DRAW>", TAG_DRAW },
633 { "<#IMPRESS>", TAG_IMPRESS },
634 { "<#SCHEDULE>", TAG_SCHEDULE },
635 { "<#IMAGE>", TAG_IMAGE },
636 { "<#MATH>", TAG_MATH },
637 { "<#CHART>", TAG_CHART },
638 { "<#OFFICE>", TAG_OFFICE },
640 // { "<#TAG_GROUP_META>", TAG_GROUP_META },
641 { "$[officefullname]", TAG_OFFICEFULLNAME },
642 { "$[officename]", TAG_OFFICENAME },
643 { "$[officepath]", TAG_OFFICEPATH },
644 { "$[officeversion]", TAG_OFFICEVERSION },
645 { "$[portalname]", TAG_PORTALNAME },
646 { "$[portalfullname]", TAG_PORTALFULLNAME },
647 { "$[portalpath]", TAG_PORTALPATH },
648 { "$[portalversion]", TAG_PORTALVERSION },
649 { "$[portalshortname]", TAG_PORTALSHORTNAME },
650 /* commenting oldstyle tags
651 // { "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE },
652 { "<#REFINSERT>", TAG_REFINSERT },
654 // { "<#GROUP_MULTI>", TAG_GROUP_MULTI },
655 { "<#END>", TAG_END },
656 { "<#ELSE>", TAG_ELSE },
657 { "<#VERSIONEND>", TAG_VERSIONEND },
658 { "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/
659 { "<Common Tag>", TAG_COMMONSTART },
660 { "</Common Tag>", TAG_COMMONEND },
662 { "<no more tags>", TAG_NOMORETAGS },
663 { "", TAG_UNKNOWN_TAG },
667 SimpleParser::SimpleParser()
668 : nPos( 0 )
669 , aNextTag( TAG_NOMORETAGS, TOK_INVALIDPOS )
673 void SimpleParser::Parse( rtl::OUString const & PaSource )
675 aSource = PaSource;
676 nPos = 0;
677 aLastToken = rtl::OUString();
678 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
679 aTokenList.clear();
682 TokenInfo SimpleParser::GetNextToken( ParserMessageList &rErrorList )
684 TokenInfo aResult;
685 sal_Int32 nTokenStartPos = 0;
686 if ( aNextTag.nId != TAG_NOMORETAGS )
688 aResult = aNextTag;
689 aNextTag = TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
691 else
693 aLastToken = GetNextTokenString( rErrorList, nTokenStartPos );
694 if ( aLastToken.isEmpty() )
695 return TokenInfo( TAG_NOMORETAGS, TOK_INVALIDPOS );
697 // do we have a \< ... \> style tag?
698 if (aLastToken.matchAsciiL(RTL_CONSTASCII_STRINGPARAM("\\<")))
700 // check for paired \" \"
701 bool bEven = true;
702 sal_Int32 nQuotePos = 0;
703 sal_Int32 nQuotedQuotesPos =
704 aLastToken.indexOfAsciiL(RTL_CONSTASCII_STRINGPARAM("\\\""));
705 sal_Int32 nQuotedBackPos = aLastToken.indexOfAsciiL(
706 RTL_CONSTASCII_STRINGPARAM("\\\\"));
707 // this is only to kick out quoted backslashes
708 while (nQuotedQuotesPos != -1)
710 if ( nQuotedBackPos != -1 && nQuotedBackPos <= nQuotedQuotesPos )
711 nQuotePos = nQuotedBackPos+2;
712 else
714 nQuotePos = nQuotedQuotesPos+2;
715 bEven = !bEven;
717 nQuotedQuotesPos = aLastToken.indexOfAsciiL(
718 RTL_CONSTASCII_STRINGPARAM("\\\""), nQuotePos);
719 nQuotedBackPos = aLastToken.indexOfAsciiL(
720 RTL_CONSTASCII_STRINGPARAM("\\\\"), nQuotePos);
721 // this is only to kick out quoted backslashes
723 if ( !bEven )
725 rErrorList.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken ) );
728 // check if we have an end-tag or a start-tag
729 sal_Int32 nNonBlankStartPos = 2;
730 while (aLastToken[nNonBlankStartPos] == ' ')
731 nNonBlankStartPos++;
732 if (aLastToken[nNonBlankStartPos] == '/')
733 aResult = TokenInfo( TAG_COMMONEND, nTokenStartPos, aLastToken, rErrorList );
734 else
736 aResult = TokenInfo( TAG_COMMONSTART, nTokenStartPos, aLastToken, rErrorList );
737 sal_Int32 nNonBlankEndPos = aLastToken.getLength() - 3;
738 while (aLastToken[nNonBlankEndPos] == ' ')
739 nNonBlankEndPos--;
740 if (aLastToken[nNonBlankEndPos] == '/')
741 aNextTag = TokenInfo( TAG_COMMONEND, nTokenStartPos, rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("\\</")) + aResult.GetTagName() + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("\\>")), rErrorList );
744 else
746 sal_Int32 i = 0;
747 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
748 aLastToken != aKnownTags[i].GetName() )
749 i++;
750 aResult = TokenInfo( aKnownTags[i].nTag, nTokenStartPos );
754 if ( aResult.nId == TAG_UNKNOWN_TAG )
755 aResult = TokenInfo( TAG_UNKNOWN_TAG, nTokenStartPos, aLastToken );
756 aTokenList.insert( aResult );
757 return aResult;
760 rtl::OUString SimpleParser::GetNextTokenString( ParserMessageList &rErrorList, sal_Int32 &rTagStartPos )
762 sal_Int32 nStyle2StartPos = aSource.indexOfAsciiL(
763 RTL_CONSTASCII_STRINGPARAM("$["), nPos );
764 sal_Int32 nStyle3StartPos = aSource.indexOfAsciiL(
765 RTL_CONSTASCII_STRINGPARAM("\\<"), nPos);
766 sal_Int32 nStyle4StartPos = aSource.indexOfAsciiL(
767 RTL_CONSTASCII_STRINGPARAM("\\\\"), nPos);
768 // this is only to kick out quoted backslashes
770 rTagStartPos = 0;
772 if (nStyle2StartPos == -1 && nStyle3StartPos == -1)
773 return rtl::OUString(); // no more tokens
775 if ( nStyle4StartPos != -1
776 && (nStyle2StartPos == -1 || nStyle4StartPos < nStyle2StartPos)
777 && (nStyle3StartPos == -1 || nStyle4StartPos < nStyle3StartPos ) )
778 // to make sure \\ is always handled first
779 { // Skip quoted Backslash
780 nPos = nStyle4StartPos +2;
781 return GetNextTokenString( rErrorList, rTagStartPos );
784 if ( nStyle2StartPos != -1 && ( nStyle3StartPos == -1 || nStyle2StartPos < nStyle3StartPos ) )
785 { // test for $[ ... ] style tokens
786 sal_Int32 nEndPos = aSource.indexOf(']', nStyle2StartPos);
787 if (nEndPos == -1)
788 { // Token is incomplete. Skip start and search for better ones
789 nPos = nStyle2StartPos +2;
790 return GetNextTokenString( rErrorList, rTagStartPos );
792 nPos = nEndPos;
793 rTagStartPos = nStyle2StartPos;
794 return aSource.copy(nStyle2StartPos, nEndPos - nStyle2StartPos + 1);
796 else
797 { // test for \< ... \> style tokens
798 sal_Int32 nEndPos = aSource.indexOfAsciiL(
799 RTL_CONSTASCII_STRINGPARAM("\\>"), nStyle3StartPos);
800 sal_Int32 nQuotedBackPos = aSource.indexOfAsciiL(
801 RTL_CONSTASCII_STRINGPARAM("\\\\"), nStyle3StartPos);
802 // this is only to kick out quoted backslashes
803 while (nQuotedBackPos <= nEndPos && nQuotedBackPos != -1)
805 nEndPos = aSource.indexOfAsciiL(
806 RTL_CONSTASCII_STRINGPARAM("\\>"), nQuotedBackPos + 2);
807 nQuotedBackPos = aSource.indexOfAsciiL(
808 RTL_CONSTASCII_STRINGPARAM("\\\\"), nQuotedBackPos + 2);
809 // this is only to kick out quoted backslashes
811 if (nEndPos == -1)
812 { // Token is incomplete. Skip start and search for better ones
813 nPos = nStyle3StartPos +2;
814 rErrorList.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG, nStyle3StartPos, helper::abbreviate(aSource, nStyle3StartPos - 10, 20) ) );
815 return GetNextTokenString( rErrorList, rTagStartPos );
817 // check for paired quoted " --> \"sometext\"
819 nPos = nEndPos;
820 rTagStartPos = nStyle3StartPos;
821 return aSource.copy(nStyle3StartPos, nEndPos-nStyle3StartPos + 2);
825 rtl::OUString SimpleParser::GetLexem( TokenInfo const &aToken )
827 if ( !aToken.aTokenString.isEmpty() )
828 return aToken.aTokenString;
829 else
831 sal_Int32 i = 0;
832 while ( aKnownTags[i].nTag != TAG_UNKNOWN_TAG &&
833 aKnownTags[i].nTag != aToken.nId )
834 i++;
836 return aKnownTags[i].GetName();
840 TokenParser::TokenParser()
841 : pErrorList( NULL )
844 void TokenParser::Parse( const rtl::OUString &aCode, ParserMessageList* pList )
846 pErrorList = pList;
848 //Scanner initialisieren
849 aParser.Parse( aCode );
851 //erstes Symbol holen
852 aTag = aParser.GetNextToken( *pErrorList );
854 nPfCaseOptions = 0;
855 nAppCaseOptions = 0;
856 bPfCaseActive = sal_False;
857 bAppCaseActive = sal_False;
859 nActiveRefTypes = 0;
861 //Ausfuehren der Start-Produktion
862 Paragraph();
864 //Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber
865 //kein Fehler aufgetreten
866 //=> es wurde ein einleitendes Tag vergessen
867 if ( aTag.nId != TAG_NOMORETAGS )
869 switch ( aTag.nId )
871 case TAG_END:
873 ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag );
875 break;
876 case TAG_BOLDOFF:
878 ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag );
880 break;
881 case TAG_ITALICOFF:
883 ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag );
885 break;
886 case TAG_UNDERLINEOFF:
888 ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag );
890 break;
891 case TAG_AEND:
893 ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag );
895 break;
896 case TAG_ELSE:
898 ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag );
900 break;
901 case TAG_UNKNOWN_TAG:
903 ParseError( 6, "unknown Tag", aTag );
905 break;
906 default:
908 ParseError( 6, "unexpected Tag", aTag );
912 pErrorList = NULL;
915 void TokenParser::Paragraph()
917 switch ( aTag.nId )
919 case TAG_GRAPHIC:
920 case TAG_NEXTVERSION:
922 TagRef();
923 Paragraph();
925 break;
926 case TAG_AVIS:
927 case TAG_AHID:
929 TagRef();
930 Paragraph();
932 break;
933 case TAG_HELPID:
935 SimpleTag();
936 Paragraph();
938 break;
939 case TAG_OFFICEFULLNAME:
940 case TAG_OFFICENAME:
941 case TAG_OFFICEPATH:
942 case TAG_OFFICEVERSION:
943 case TAG_PORTALNAME:
944 case TAG_PORTALFULLNAME:
945 case TAG_PORTALPATH:
946 case TAG_PORTALVERSION:
947 case TAG_PORTALSHORTNAME:
949 SimpleTag();
950 Paragraph();
952 break;
953 case TAG_REFINSERT:
955 SimpleTag();
956 Paragraph();
958 break;
959 case TAG_BOLDON:
960 case TAG_ITALICON:
961 case TAG_UNDERLINEON:
962 case TAG_COMMONSTART:
964 TagPair();
965 Paragraph();
967 break;
968 case TAG_HREF:
969 case TAG_NAME:
970 case TAG_KEY:
971 case TAG_INDEX:
972 case TAG_TITEL:
973 case TAG_REFSTART:
975 TagRef();
976 Paragraph();
978 break;
979 case TAG_WIN:
980 case TAG_UNIX:
981 case TAG_MAC: //...
983 if ( ! bPfCaseActive )
985 //PfCases duerfen nicht verschachtelt sein:
986 bPfCaseActive = sal_True;
987 PfCase();
989 //So jetzt kann wieder ein PfCase kommen:
990 bPfCaseActive = sal_False;
991 Paragraph();
994 break;
995 case TAG_WRITER:
996 case TAG_CALC:
997 case TAG_DRAW:
998 case TAG_IMPRESS:
999 case TAG_SCHEDULE:
1000 case TAG_IMAGE:
1001 case TAG_MATH:
1002 case TAG_CHART:
1003 case TAG_OFFICE:
1005 if ( !bAppCaseActive )
1007 //AppCases duerfen nicht verschachtelt sein:
1008 bAppCaseActive = sal_True;
1009 AppCase();
1011 //jetzt koennen wieder AppCases kommen:
1012 bAppCaseActive = sal_False;
1013 Paragraph();
1016 break;
1018 //Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END
1019 //nichts tun wg. epsilon-Prod.
1023 void TokenParser::PfCase()
1026 //Produktion:
1027 //PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd)
1029 PfCaseBegin();
1031 //Jetzt ist eine PfCase-Produktion aktiv:
1032 Paragraph();
1033 switch ( aTag.nId )
1035 case TAG_ELSE:
1036 case TAG_END:
1038 CaseEnd();
1040 break;
1041 case TAG_WIN:
1042 case TAG_UNIX:
1043 case TAG_MAC: //First (PfBegin)
1045 PfCase();
1047 break;
1048 default:
1049 ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag );
1051 //Die gemerkten Tags wieder loeschen fuer naechstes PfCase:
1052 nPfCaseOptions = 0;
1055 void TokenParser::PfCaseBegin()
1057 switch ( aTag.nId )
1059 case TAG_WIN:
1060 case TAG_UNIX:
1061 case TAG_MAC:
1063 //Token darf noch nicht vorgekommen sein im
1064 //aktuellen Plattform-Case:
1065 if ( !HAS_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1067 SET_FLAG( nPfCaseOptions, TAG_NOGROUP( aTag.nId ) );
1068 match( aTag, aTag );
1070 else {
1071 ParseError( 9, "Tag defined twice in the same platform-case", aTag );
1077 void TokenParser::AppCase()
1080 //Produktion:
1081 //AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd)
1084 AppCaseBegin();
1086 Paragraph();
1088 switch ( aTag.nId )
1090 case TAG_ELSE:
1091 case TAG_END:
1093 CaseEnd();
1095 break;
1096 case TAG_WRITER:
1097 case TAG_DRAW:
1098 case TAG_CALC:
1099 case TAG_IMAGE:
1100 case TAG_MATH:
1101 case TAG_CHART:
1102 case TAG_OFFICE:
1103 case TAG_IMPRESS:
1104 case TAG_SCHEDULE: //First (AppBegin)
1106 AppCase();
1108 break;
1109 default:
1110 ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag );
1113 //Die gemerkten Tags wieder loeschen fuer naechstes AppCase:
1114 nAppCaseOptions = 0;
1117 void TokenParser::AppCaseBegin()
1119 switch ( aTag.nId )
1121 case TAG_WRITER:
1122 case TAG_DRAW:
1123 case TAG_CALC:
1124 case TAG_IMAGE:
1125 case TAG_MATH:
1126 case TAG_CHART:
1127 case TAG_OFFICE:
1128 case TAG_IMPRESS:
1129 case TAG_SCHEDULE:
1131 //Token darf noch nicht vorgekommen sein im
1132 //aktuellen Plattform-Case:
1133 if ( !HAS_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) ) )
1135 SET_FLAG( nAppCaseOptions, TAG_NOGROUP( aTag.nId ) );
1136 match( aTag, aTag );
1138 else {
1139 ParseError( 13, "Tag defined twice in the same application-case.", aTag );
1145 void TokenParser::CaseEnd()
1147 //Produktion:
1148 //CaseEnd -> <#ELSE> Paragraph <#END> | <#END>
1150 switch ( aTag.nId )
1152 case TAG_ELSE:
1154 match( aTag, TAG_ELSE );
1155 Paragraph();
1156 match( aTag, TAG_END );
1158 break;
1159 case TAG_END:
1161 match( aTag, TAG_END );
1163 break;
1164 default:
1165 ParseError( 2, "<#ELSE> or <#END> expected.", aTag );
1169 void TokenParser::SimpleTag()
1172 switch ( aTag.nId )
1174 case TAG_HELPID:
1176 match( aTag, TAG_HELPID );
1178 break;
1179 case TAG_OFFICEFULLNAME:
1180 case TAG_OFFICENAME:
1181 case TAG_OFFICEPATH:
1182 case TAG_OFFICEVERSION:
1183 case TAG_PORTALNAME:
1184 case TAG_PORTALFULLNAME:
1185 case TAG_PORTALPATH:
1186 case TAG_PORTALVERSION:
1187 case TAG_PORTALSHORTNAME:
1189 case TAG_REFINSERT:
1191 match( aTag, aTag );
1193 break;
1194 default:
1195 ParseError( 15, "[<#SimpleTag>] expected.", aTag );
1199 void TokenParser::TagPair()
1201 switch ( aTag.nId )
1203 case TAG_BOLDON:
1205 match( aTag, TAG_BOLDON );
1206 Paragraph();
1207 match( aTag, TAG_BOLDOFF );
1209 break;
1210 case TAG_ITALICON:
1212 match( aTag, TAG_ITALICON );
1213 Paragraph();
1214 match( aTag, TAG_ITALICOFF );
1216 break;
1217 case TAG_UNDERLINEON:
1219 match( aTag, TAG_UNDERLINEON );
1220 Paragraph();
1221 match( aTag, TAG_UNDERLINEOFF );
1223 break;
1224 case TAG_COMMONSTART:
1226 //remember tag so we can give the original tag in case of an error
1227 TokenInfo aEndTag( aTag );
1228 aEndTag.nId = TAG_COMMONEND;
1229 match( aTag, TAG_COMMONSTART );
1230 Paragraph();
1231 match( aTag, aEndTag );
1233 break;
1234 default:
1235 ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag );
1240 void TokenParser::TagRef()
1242 switch ( aTag.nId )
1244 case TAG_GRAPHIC:
1245 case TAG_NEXTVERSION:
1247 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1249 TokenId aThisToken = aTag.nId;
1250 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1251 match( aTag, aTag );
1252 Paragraph();
1253 if ( aThisToken == TAG_GRAPHIC )
1254 match( aTag, TAG_ENDGRAPHIC );
1255 else
1256 match( aTag, TAG_VERSIONEND );
1257 // don't reset since alowed only once per paragraph
1258 // RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1260 else
1262 ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag );
1265 break;
1266 case TAG_AVIS:
1267 case TAG_AHID:
1269 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1271 TokenId aThisToken = aTag.nId;
1272 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1273 match( aTag, aTag );
1274 Paragraph();
1275 match( aTag, TAG_AEND );
1276 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1278 else
1280 ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag );
1283 break;
1284 case TAG_HREF:
1285 case TAG_NAME:
1289 // NOBREAK
1290 case TAG_KEY:
1291 case TAG_INDEX:
1292 case TAG_TITEL:
1293 case TAG_REFSTART:
1295 if ( !HAS_FLAG( nActiveRefTypes, TAG_NOGROUP( aTag.nId ) ) )
1297 TokenId aThisToken = aTag.nId;
1298 match( aTag, aTag );
1299 if ( aThisToken != TAG_NAME )
1300 { // TAG_NAME has no TAG_END
1301 SET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1302 Paragraph();
1303 match( aTag, TAG_END );
1304 RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1307 else
1309 ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag );
1312 break;
1313 default:
1314 ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag );
1318 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenId &aExpectedToken )
1320 return match( aCurrentToken, TokenInfo( aExpectedToken, TOK_INVALIDPOS ) );
1323 sal_Bool TokenParser::match( const TokenInfo &aCurrentToken, const TokenInfo &rExpectedToken )
1325 TokenInfo aExpectedToken( rExpectedToken );
1326 if ( aCurrentToken.nId == aExpectedToken.nId )
1328 if ( ( aCurrentToken.nId == TAG_COMMONEND
1329 && aCurrentToken.GetTagName() == aExpectedToken.GetTagName() )
1330 || aCurrentToken.nId != TAG_COMMONEND )
1332 aTag = aParser.GetNextToken( *pErrorList );
1333 return sal_True;
1337 if ( aExpectedToken.nId == TAG_COMMONEND )
1339 aExpectedToken.aTokenString =
1340 rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("Close tag for "))
1341 + aExpectedToken.aTokenString;
1344 rtl::OString sTmp(RTL_CONSTASCII_STRINGPARAM("Expected Symbol"));
1345 if ( aCurrentToken.nId == TAG_NOMORETAGS )
1347 ParseError( 7, sTmp, aExpectedToken );
1349 else
1351 rtl::OStringBuffer aBuf(sTmp);
1352 aBuf.append(": ").
1353 append(rtl::OUStringToOString(aParser.GetLexem( aExpectedToken ), RTL_TEXTENCODING_UTF8)).
1354 append(RTL_CONSTASCII_STRINGPARAM(" near "));
1355 ParseError( 7, aBuf.makeStringAndClear(), aCurrentToken );
1357 return sal_False;
1360 void TokenParser::ParseError( sal_Int32 nErrNr, const rtl::OString &rErrMsg, const TokenInfo &rTag )
1362 pErrorList->AddError( nErrNr, rErrMsg, rTag);
1364 // Das Fehlerhafte Tag ueberspringen
1365 aTag = aParser.GetNextToken( *pErrorList );
1369 ParserMessage::ParserMessage( sal_Int32 PnErrorNr, const rtl::OString &rPaErrorText, const TokenInfo &rTag )
1370 : nErrorNr( PnErrorNr )
1371 , nTagBegin( 0 )
1372 , nTagLength( 0 )
1374 rtl::OUString aLexem( SimpleParser::GetLexem( rTag ) );
1375 rtl::OStringBuffer aErrorBuffer(rPaErrorText);
1376 aErrorBuffer.append(RTL_CONSTASCII_STRINGPARAM(": "));
1377 aErrorBuffer.append(rtl::OUStringToOString(aLexem, RTL_TEXTENCODING_UTF8));
1378 if ( rTag.nId == TAG_NOMORETAGS )
1379 aErrorBuffer.append(RTL_CONSTASCII_STRINGPARAM(" at end of line "));
1380 else if ( rTag.nPos != TOK_INVALIDPOS )
1382 aErrorBuffer.append(RTL_CONSTASCII_STRINGPARAM(" at Position "));
1383 aErrorBuffer.append(static_cast<sal_Int32>(rTag.nPos));
1385 aErrorText = aErrorBuffer.makeStringAndClear();
1386 nTagBegin = rTag.nPos;
1387 nTagLength = aLexem.getLength();
1390 ParserError::ParserError( sal_Int32 ErrorNr, const rtl::OString &rErrorText, const TokenInfo &rTag )
1391 : ParserMessage( ErrorNr, rErrorText, rTag )
1394 ParserWarning::ParserWarning( sal_Int32 ErrorNr, const rtl::OString &rErrorText, const TokenInfo &rTag )
1395 : ParserMessage( ErrorNr, rErrorText, rTag )
1398 sal_Bool LingTest::IsTagMandatory( TokenInfo const &aToken, TokenId &aMetaTokens )
1400 TokenId aTokenId = aToken.nId;
1401 TokenId aTokenGroup = TAG_GROUP( aTokenId );
1402 if ( TAG_GROUP_PROGSWITCH == aTokenGroup
1403 || TAG_REFINSERT == aTokenId
1404 || TAG_REFSTART == aTokenId
1405 || TAG_NAME == aTokenId
1406 || TAG_HREF == aTokenId
1407 || TAG_AVIS == aTokenId
1408 || TAG_AHID == aTokenId
1409 || TAG_GRAPHIC == aTokenId
1410 || TAG_NEXTVERSION == aTokenId
1411 || ( TAG_GROUP_META == aTokenGroup && (aMetaTokens & aTokenId) == aTokenId ) )
1413 if ( TAG_GROUP_META == aTokenGroup )
1414 aMetaTokens |= aTokenId;
1415 return sal_True;
1417 else if ( TAG_COMMONSTART == aTokenId
1418 || TAG_COMMONEND == aTokenId )
1420 rtl::OUString aTagName = aToken.GetTagName();
1421 return !(aTagName.equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("comment"))
1422 || aTagName.equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("bookmark_value"))
1423 || aTagName.equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("emph"))
1424 || aTagName.equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("item"))
1425 || aTagName.equalsIgnoreAsciiCaseAsciiL(RTL_CONSTASCII_STRINGPARAM("br")) );
1427 return sal_False;
1430 void LingTest::CheckTags( TokenList &aReference, TokenList &aTestee, sal_Bool bFixTags )
1432 size_t i=0,j=0;
1433 // Clean old Warnings
1434 aCompareWarningList.clear();
1436 /* in xml tags, do not require the following tags
1437 comment
1438 bookmark_value
1439 emph
1440 item
1444 // filter uninteresting Tags
1445 TokenId aMetaTokens = 0;
1446 for ( i=0 ; i < aReference.size() ; i++ )
1448 if ( !IsTagMandatory( aReference[ i ], aMetaTokens ) )
1449 aReference[ i ].SetDone();
1452 aMetaTokens = 0;
1453 for ( i=0 ; i < aTestee.size() ; i++ )
1455 if ( !IsTagMandatory( aTestee[ i ], aMetaTokens ) )
1456 aTestee[ i ].SetDone();
1459 // remove all matching tags
1460 for ( i=0 ; i < aReference.size() ; i++ )
1462 if ( aReference[ i ].IsDone() )
1463 continue;
1465 sal_Bool bTagFound = sal_False;
1466 for ( j=0 ; j < aTestee.size() && !bTagFound ; j++ )
1468 if ( aTestee[ j ].IsDone() )
1469 continue;
1471 if ( aReference[ i ].MatchesTranslation( aTestee[ j ], sal_False, aCompareWarningList ) )
1473 aReference[ i ].SetDone();
1474 aTestee[ j ].SetDone();
1475 bTagFound = sal_True;
1480 sal_Bool bCanFix = sal_True;
1482 if ( bFixTags )
1484 // we fix only if its a really simple case
1485 sal_Int32 nTagCount = 0;
1486 for ( i=0 ; i < aReference.size() ; i++ )
1487 if ( !aReference[ i ].IsDone() )
1488 nTagCount++;
1489 if ( nTagCount > 1 )
1490 bCanFix = sal_False;
1492 nTagCount = 0;
1493 for ( i=0 ; i < aTestee.size() ; i++ )
1494 if ( !aTestee[ i ].IsDone() )
1495 nTagCount++;
1496 if ( nTagCount > 1 )
1497 bCanFix = sal_False;
1500 // generate errors for tags that have differing attributes
1501 for ( i=0 ; i < aReference.size() ; i++ )
1503 if ( aReference[ i ].IsDone() )
1504 continue;
1506 sal_Bool bTagFound = sal_False;
1507 for ( j=0 ; j < aTestee.size() && !bTagFound ; j++ )
1509 if ( aTestee[ j ].IsDone() )
1510 continue;
1512 if ( aReference[ i ].MatchesTranslation( aTestee[ j ], sal_True, aCompareWarningList, bCanFix && bFixTags ) )
1514 aReference[ i ].SetDone();
1515 aTestee[ j ].SetDone();
1516 bTagFound = sal_True;
1521 // list remaining tags as errors
1522 for ( i=0 ; i < aReference.size() ; i++ )
1524 if ( aReference[ i ].IsDone() )
1525 continue;
1527 aCompareWarningList.AddError( 20, "Missing Tag in Translation", aReference[ i ] );
1529 for ( i=0 ; i < aTestee.size() ; i++ )
1531 if ( aTestee[ i ].IsDone() )
1532 continue;
1534 aCompareWarningList.AddError( 21, "Extra Tag in Translation", aTestee[ i ] );
1537 for ( i=0 ; i < aReference.size() ; i++ )
1538 aReference[ i ].SetDone( sal_False );
1540 for ( i=0 ; i < aTestee.size() ; i++ )
1541 aTestee[ i ].SetDone( sal_False );
1544 void LingTest::CheckReference( GSILine *aReference )
1546 aReferenceParser.Parse( aReference->GetUText(), aReference->GetMessageList() );
1549 void LingTest::CheckTestee( GSILine *aTestee, sal_Bool bHasSourceLine, sal_Bool bFixTags )
1551 aFixedTestee = aTestee->GetUText();
1552 aTesteeParser.Parse( aFixedTestee, aTestee->GetMessageList() );
1554 if ( bHasSourceLine )
1555 CheckTags( aReferenceParser.GetTokenList(), aTesteeParser.GetTokenList(), bFixTags );
1557 if ( bFixTags )
1559 TokenList& aTesteeTokens = aTesteeParser.GetTokenList();
1560 sal_Bool bFixesDone = sal_False;
1561 // count backwards to allow replacing from right to left
1562 int i;
1563 for ( i = aTesteeTokens.size() ; i > 0 ; )
1565 if ( aTesteeTokens[ --i ].HasBeenFixed() )
1567 bFixesDone = sal_True;
1568 aFixedTestee = aFixedTestee.replaceAt( aTesteeTokens[ i ].nPos, aTesteeTokens[ i ].aTokenString.getLength(), aTesteeTokens[ i ].MakeTag() );
1571 if ( bFixesDone )
1573 aTestee->SetUText( aFixedTestee );
1574 aTestee->SetFixed();
1579 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */