1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: tagtest.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 // MARKER(update_precomp.py): autogen include statement, do not remove
32 #include "precompiled_transex3.hxx"
33 #include <tools/string.hxx>
34 #include "tagtest.hxx"
36 #if OSL_DEBUG_LEVEL > 1
40 #include "gsicheck.hxx"
42 #define HAS_FLAG( nFlags, nFlag ) ( ( nFlags & nFlag ) != 0 )
43 #define SET_FLAG( nFlags, nFlag ) ( nFlags |= nFlag )
44 #define RESET_FLAG( nFlags, nFlag ) ( nFlags &= ~nFlag ) // ~ = Bitweises NOT
48 TokenInfo::TokenInfo( TokenId pnId
, USHORT nP
, String paStr
, ParserMessageList
&rErrorList
)
52 , bHasBeenFixed(FALSE
)
54 , aTokenString( paStr
)
58 if ( nId
== TAG_COMMONSTART
|| nId
== TAG_COMMONEND
)
59 SplitTag( rErrorList
);
62 enum tagcheck
{ TC_START
, TC_HAS_TAG_NAME
, TC_HAS_PROP_NAME_EQ
, TC_HAS_PROP_NAME_EQ_SP
, TC_HAS_PROP_NAME_SP
, TC_INSIDE_STRING
, TC_PROP_FINISHED
, TC_CLOSED
, TC_CLOSED_SPACE
, TC_CLOSETAG
, TC_CLOSETAG_HAS_TAG_NAME
, TC_FINISHED
, TC_ERROR
};
65 \< link href = \"text\" name = \"C\" \>
66 START ' ' -> HAS_TAG_NAME
68 START '/' -> CLOSETAG - no Portion (starting with /)
70 HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ
71 HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP
72 HAS_TAG_NAME '/' -> CLOSED
73 HAS_TAG_NAME '>' -> FINISHED
74 HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ
75 HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP
76 HAS_PROP_NAME_EQ '"' -> INSIDE_STRING
77 HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING
78 INSIDE_STRING ' ' -> INSIDE_STRING
79 INSIDE_STRING '=' -> INSIDE_STRING
80 INSIDE_STRING '>' -> INSIDE_STRING
81 INSIDE_STRING '"' -> PROP_FINISHED
82 PROP_FINISHED ' ' -> HAS_TAG_NAME
83 PROP_FINISHED '/' -> CLOSED
84 PROP_FINISHED '>' -> FINISHED
85 CLOSED ' ' -> CLOSED_SPACE
86 CLOSED '>' -> FINISHED
87 CLOSED_SPACE '>' -> FINISHED
89 CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME
90 CLOSETAG '>' -> FINISHED
91 CLOSETAG_HAS_TAG_NAME '>' -> FINISHED
94 void TokenInfo::SplitTag( ParserMessageList
&rErrorList
)
96 USHORT nLastPos
= 2; // skip initial \<
97 USHORT nCheckPos
= nLastPos
;
98 String
aDelims( String::CreateFromAscii( " \\=>/" ) );
100 String aValue
; // store the value of a property
101 ByteString aName
; // store the name of a property/tag
102 BOOL bCheckName
= FALSE
;
103 BOOL bCheckEmpty
= FALSE
;
105 tagcheck aState
= TC_START
;
108 while ( nLastPos
< aTokenString
.Len() && aTokenString
.GetChar( nLastPos
) == ' ')
111 nCheckPos
= aTokenString
.SearchChar( aDelims
.GetBuffer(), nLastPos
);
112 while ( nCheckPos
!= STRING_NOTFOUND
&& !( aState
== TC_FINISHED
|| aState
== TC_ERROR
) )
114 aPortion
= aTokenString
.Copy( nLastPos
, nCheckPos
-nLastPos
);
116 if ( aTokenString
.GetChar( nCheckPos
) == '\\' )
119 cDelim
= aTokenString
.GetChar( nCheckPos
);
124 // START ' ' -> HAS_TAG_NAME
125 // START '/' -> CLOSED
126 // START '>' -> FINISHED
131 case ' ': aState
= TC_HAS_TAG_NAME
;
136 if ( aPortion
.Len() == 0 )
138 aState
= TC_CLOSETAG
;
147 case '>': aState
= TC_FINISHED
;
150 default: aState
= TC_ERROR
;
154 // HAS_TAG_NAME '=' -> HAS_PROP_NAME_EQ
155 // HAS_TAG_NAME ' ' -> HAS_PROP_NAME_SP
156 // HAS_TAG_NAME '/' -> CLOSED
157 // HAS_TAG_NAME '>' -> FINISHED
158 case TC_HAS_TAG_NAME
:
161 case '=': aState
= TC_HAS_PROP_NAME_EQ
;
164 case ' ': aState
= TC_HAS_PROP_NAME_SP
;
167 case '/': aState
= TC_CLOSED
;
170 case '>': aState
= TC_FINISHED
;
173 default: aState
= TC_ERROR
;
177 // HAS_PROP_NAME_SP '=' -> HAS_PROP_NAME_EQ
178 case TC_HAS_PROP_NAME_SP
:
181 case '=': aState
= TC_HAS_PROP_NAME_EQ
;
184 default: aState
= TC_ERROR
;
188 // HAS_PROP_NAME_EQ ' ' -> HAS_PROP_NAME_EQ_SP
189 // HAS_PROP_NAME_EQ '"' -> INSIDE_STRING
190 case TC_HAS_PROP_NAME_EQ
:
193 case ' ': aState
= TC_HAS_PROP_NAME_EQ_SP
;
196 case '\"': aState
= TC_INSIDE_STRING
;
200 default: aState
= TC_ERROR
;
204 // HAS_PROP_NAME_EQ_SP '"' -> INSIDE_STRING
205 case TC_HAS_PROP_NAME_EQ_SP
:
208 case '\"': aState
= TC_INSIDE_STRING
;
212 default: aState
= TC_ERROR
;
216 // INSIDE_STRING * -> INSIDE_STRING
217 // INSIDE_STRING '"' -> PROP_FINISHED
218 case TC_INSIDE_STRING
:
223 aState
= TC_PROP_FINISHED
;
225 if ( aProperties
.find( aName
) == aProperties
.end() )
227 if ( !IsPropertyValueValid( aName
, aValue
) )
229 rErrorList
.AddError( 25, ByteString("Property '").Append(aName
).Append("' has invalid value '").Append(ByteString( aValue
, RTL_TEXTENCODING_UTF8
)).Append("' "), *this );
232 aProperties
[ aName
] = aValue
;
236 rErrorList
.AddError( 25, ByteString("Property '").Append(aName
).Append("' defined twice "), *this );
243 aState
= TC_INSIDE_STRING
;
250 // PROP_FINISHED ' ' -> HAS_TAG_NAME
251 // PROP_FINISHED '/' -> CLOSED
252 // PROP_FINISHED '>' -> FINISHED
253 case TC_PROP_FINISHED
:
256 case ' ': aState
= TC_HAS_TAG_NAME
;
259 case '/': aState
= TC_CLOSED
;
262 case '>': aState
= TC_FINISHED
;
265 default: aState
= TC_ERROR
;
269 // CLOSED ' ' -> CLOSED_SPACE
270 // CLOSED '>' -> FINISHED
274 case ' ': aState
= TC_CLOSED_SPACE
;
278 case '>': aState
= TC_FINISHED
;
281 default: aState
= TC_ERROR
;
285 // CLOSED_SPACE '>' -> FINISHED
286 case TC_CLOSED_SPACE
:
289 case '>': aState
= TC_FINISHED
;
292 default: aState
= TC_ERROR
;
296 // CLOSETAG ' ' -> CLOSETAG_HAS_TAG_NAME
297 // CLOSETAG '>' -> FINISHED
302 case ' ': aState
= TC_CLOSETAG_HAS_TAG_NAME
;
306 case '>': aState
= TC_FINISHED
;
310 default: aState
= TC_ERROR
;
314 // CLOSETAG_HAS_TAG_NAME '>' -> FINISHED
315 case TC_CLOSETAG_HAS_TAG_NAME
:
318 case '>': aState
= TC_FINISHED
;
321 default: aState
= TC_ERROR
;
326 default: rErrorList
.AddError( 99, "Internal error Parsing Tag ", *this );
333 if ( aPortion
.Len() == 0 )
335 rErrorList
.AddError( 25, "Tag/Property name missing ", *this );
340 aName
= ByteString( aPortion
, RTL_TEXTENCODING_UTF8
);
343 BOOL bBroken
= FALSE
;
344 const sal_Char
* aBuf
= aName
.GetBuffer();
345 for ( nCount
= 0 ; !bBroken
&& nCount
< aName
.Len() ; nCount
++ )
347 bBroken
= ! ( ( aBuf
[nCount
] >= 'a' && aBuf
[nCount
] <= 'z' )
348 ||( aBuf
[nCount
] >= 'A' && aBuf
[nCount
] <= 'Z' )
349 ||( aBuf
[nCount
] >= '0' && aBuf
[nCount
] <= '9' )
350 ||( aBuf
[nCount
] == '_' )
351 ||( aBuf
[nCount
] == '-' )
352 ||( aBuf
[nCount
] == '.' )
358 rErrorList
.AddError( 25, "Found illegal character in Tag/Property name ", *this );
368 if ( aPortion
.Len() )
370 rErrorList
.AddError( 25, ByteString("Found displaced characters '").Append(ByteString( aPortion
, RTL_TEXTENCODING_UTF8
)).Append("' in Tag "), *this );
377 nLastPos
= nCheckPos
;
379 // skip further blanks
380 if ( cDelim
== ' ' && aState
!= TC_INSIDE_STRING
)
381 while ( nLastPos
< aTokenString
.Len() && aTokenString
.GetChar( nLastPos
) == ' ')
384 nCheckPos
= aTokenString
.SearchChar( aDelims
.GetBuffer(), nLastPos
);
386 if ( aState
!= TC_FINISHED
)
388 rErrorList
.AddError( 25, "Parsing error in Tag ", *this );
393 BOOL
TokenInfo::IsPropertyRelevant( const ByteString
&aName
, const String
&aValue
) const
395 if ( aTagName
.EqualsAscii( "alt" ) && aName
.Equals( "xml-lang" ) )
397 if ( aTagName
.EqualsAscii( "ahelp" ) && aName
.Equals( "visibility" ) && aValue
.EqualsAscii("visible") )
399 if ( aTagName
.EqualsAscii( "image" ) && (aName
.Equals( "width" ) || aName
.Equals( "height" )) )
405 BOOL
TokenInfo::IsPropertyValueValid( const ByteString
&aName
, const String
&aValue
) const
407 /* removed due to i56740
408 if ( aTagName.EqualsAscii( "switchinline" ) && aName.Equals( "select" ) )
410 return aValue.EqualsAscii("sys") ||
411 aValue.EqualsAscii("appl") ||
412 aValue.EqualsAscii("distrib");
414 if ( aTagName
.EqualsAscii( "caseinline" ) && aName
.Equals( "select" ) )
416 return /*!aValue.EqualsAscii("OS2") && removed due to i56740 */
417 !aValue
.EqualsAscii("");
420 // we don't know any better so we assume it to be OK
424 BOOL
TokenInfo::IsPropertyInvariant( const ByteString
&aName
, const String
&aValue
) const
426 if ( aTagName
.EqualsAscii( "link" ) && aName
.Equals( "name" ) )
428 if ( aTagName
.EqualsAscii( "link" ) && aName
.Equals( "href" ) )
429 { // check for external reference
430 if ( aValue
.Copy( 0, 5 ).EqualsIgnoreCaseAscii( "http:" )
431 || aValue
.Copy( 0, 6 ).EqualsIgnoreCaseAscii( "https:" )
432 || aValue
.Copy( 0, 4 ).EqualsIgnoreCaseAscii( "ftp:" ) )
440 BOOL
TokenInfo::IsPropertyFixable( const ByteString
&aName
) const
442 // name everything that is allowed to be fixed automatically here
443 if ( (aTagName
.EqualsAscii( "ahelp" ) && aName
.Equals( "hid" ))
444 || (aTagName
.EqualsAscii( "link" ) && aName
.Equals( "href" ))
445 || (aTagName
.EqualsAscii( "alt" ) && aName
.Equals( "id" ))
446 || (aTagName
.EqualsAscii( "variable" ) && aName
.Equals( "id" ))
447 || (aTagName
.EqualsAscii( "image" ) && aName
.Equals( "src" ))
448 || (aTagName
.EqualsAscii( "image" ) && aName
.Equals( "id" ) ))
453 BOOL
TokenInfo::MatchesTranslation( TokenInfo
& rInfo
, BOOL bGenErrors
, ParserMessageList
&rErrorList
, BOOL bFixTags
) const
455 // check if tags are equal
456 // check if all existing properties are in the translation as well and
457 // wether they have a matching content (the same in most cases)
459 if ( nId
!= rInfo
.nId
)
462 if ( !aTagName
.Equals( rInfo
.aTagName
) )
465 // If one of the tags has formating errors already it does make no sense to check here, so return right away
466 if ( bGenErrors
&& ( bIsBroken
|| rInfo
.bIsBroken
) )
469 StringHashMap::const_iterator iProp
;
470 for( iProp
= aProperties
.begin() ; iProp
!= aProperties
.end(); ++iProp
)
472 if ( rInfo
.aProperties
.find( iProp
->first
) != rInfo
.aProperties
.end() )
474 if ( IsPropertyRelevant( iProp
->first
, iProp
->second
) || IsPropertyRelevant( iProp
->first
, rInfo
.aProperties
.find( iProp
->first
)->second
) )
476 if ( IsPropertyInvariant( iProp
->first
, iProp
->second
) )
478 if ( !rInfo
.aProperties
.find( iProp
->first
)->second
.Equals( iProp
->second
) )
482 if ( bFixTags
&& IsPropertyFixable( iProp
->first
) )
484 rInfo
.aProperties
.find( iProp
->first
)->second
= iProp
->second
;
485 rInfo
.SetHasBeenFixed();
486 rErrorList
.AddWarning( 25, ByteString("Property '").Append(iProp
->first
).Append("': FIXED different value in Translation "), *this );
489 rErrorList
.AddError( 25, ByteString("Property '").Append(iProp
->first
).Append("': value different in Translation "), *this );
498 if ( IsPropertyRelevant( iProp
->first
, iProp
->second
) )
501 rErrorList
.AddError( 25, ByteString("Property '").Append(iProp
->first
).Append("' missing in Translation "), *this );
506 for( iProp
= rInfo
.aProperties
.begin() ; iProp
!= rInfo
.aProperties
.end(); ++iProp
)
508 if ( aProperties
.find( iProp
->first
) == aProperties
.end() )
510 if ( IsPropertyRelevant( iProp
->first
, iProp
->second
) )
513 rErrorList
.AddError( 25, ByteString("Extra Property '").Append(iProp
->first
).Append("' in Translation "), rInfo
);
519 // if we reach here eather
520 // the tags match completely or
521 // the tags match but not the properties and we generated errors for that
525 String
TokenInfo::GetTagName() const
530 String
TokenInfo::MakeTag() const
533 aRet
.AppendAscii("\\<");
535 aRet
.AppendAscii("/");
536 aRet
.Append( GetTagName() );
537 StringHashMap::const_iterator iProp
;
539 for( iProp
= aProperties
.begin() ; iProp
!= aProperties
.end(); ++iProp
)
541 aRet
.AppendAscii(" ");
542 aRet
.Append( String( iProp
->first
, RTL_TEXTENCODING_UTF8
) );
543 aRet
.AppendAscii("=\\\"");
544 aRet
.Append( iProp
->second
);
545 aRet
.AppendAscii("\\\"");
548 aRet
.AppendAscii("/");
549 aRet
.AppendAscii("\\>");
554 void ParserMessageList::AddError( USHORT nErrorNr
, ByteString aErrorText
, const TokenInfo
&rTag
)
556 Insert( new ParserError( nErrorNr
, aErrorText
, rTag
), LIST_APPEND
);
559 void ParserMessageList::AddWarning( USHORT nErrorNr
, ByteString aErrorText
, const TokenInfo
&rTag
)
561 Insert( new ParserWarning( nErrorNr
, aErrorText
, rTag
), LIST_APPEND
);
564 BOOL
ParserMessageList::HasErrors()
567 for ( i
=0 ; i
< Count() ; i
++ )
568 if ( GetObject( i
)->IsError() )
575 String
GetName() const { return String::CreateFromAscii( pName
); };
581 static const Tag aKnownTags
[] =
583 /* commenting oldstyle tags
584 // { "<#GROUP_FORMAT>", TAG_GROUP_FORMAT },
585 { "<#BOLD>", TAG_BOLDON },
586 { "<#/BOLD>", TAG_BOLDOFF },
587 { "<#ITALIC>", TAG_ITALICON },
588 { "<#/ITALIC>", TAG_ITALICOFF },
589 { "<#UNDER>", TAG_UNDERLINEON },
590 { "<#/UNDER>", TAG_UNDERLINEOFF },
592 // { "<#GROUP_NOTALLOWED>", TAG_GROUP_NOTALLOWED },
593 { "<#HELPID>", TAG_HELPID },
594 { "<#MODIFY>", TAG_MODIFY },
595 { "<#REFNR>", TAG_REFNR },
597 // { "<#GROUP_STRUCTURE>", TAG_GROUP_STRUCTURE },
598 { "<#NAME>", TAG_NAME },
599 { "<#HREF>", TAG_HREF },
600 { "<#AVIS>", TAG_AVIS },
601 { "<#AHID>", TAG_AHID },
602 { "<#AEND>", TAG_AEND },
604 { "<#TITEL>", TAG_TITEL },
605 { "<#KEY>", TAG_KEY },
606 { "<#INDEX>", TAG_INDEX },
608 { "<#REFSTART>", TAG_REFSTART },
610 { "<#GRAPHIC>", TAG_GRAPHIC },
611 { "<#NEXTVERSION>", TAG_NEXTVERSION },
613 // { "<#GROUP_SYSSWITCH>", TAG_GROUP_SYSSWITCH },
614 { "<#WIN>", TAG_WIN },
615 { "<#UNIX>", TAG_UNIX },
616 { "<#MAC>", TAG_MAC },
617 { "<#OS2>", TAG_OS2 },
619 // { "<#GROUP_PROGSWITCH>", TAG_GROUP_PROGSWITCH },
620 { "<#WRITER>", TAG_WRITER },
621 { "<#CALC>", TAG_CALC },
622 { "<#DRAW>", TAG_DRAW },
623 { "<#IMPRESS>", TAG_IMPRESS },
624 { "<#SCHEDULE>", TAG_SCHEDULE },
625 { "<#IMAGE>", TAG_IMAGE },
626 { "<#MATH>", TAG_MATH },
627 { "<#CHART>", TAG_CHART },
628 { "<#OFFICE>", TAG_OFFICE },
630 // { "<#TAG_GROUP_META>", TAG_GROUP_META },
631 { "$[officefullname]", TAG_OFFICEFULLNAME
},
632 { "$[officename]", TAG_OFFICENAME
},
633 { "$[officepath]", TAG_OFFICEPATH
},
634 { "$[officeversion]", TAG_OFFICEVERSION
},
635 { "$[portalname]", TAG_PORTALNAME
},
636 { "$[portalfullname]", TAG_PORTALFULLNAME
},
637 { "$[portalpath]", TAG_PORTALPATH
},
638 { "$[portalversion]", TAG_PORTALVERSION
},
639 { "$[portalshortname]", TAG_PORTALSHORTNAME
},
640 /* commenting oldstyle tags
641 // { "<#TAG_GROUP_SINGLE>", TAG_GROUP_SINGLE },
642 { "<#REFINSERT>", TAG_REFINSERT },
644 // { "<#GROUP_MULTI>", TAG_GROUP_MULTI },
645 { "<#END>", TAG_END },
646 { "<#ELSE>", TAG_ELSE },
647 { "<#VERSIONEND>", TAG_VERSIONEND },
648 { "<#ENDGRAPHIC>", TAG_ENDGRAPHIC },*/
649 { "<Common Tag>", TAG_COMMONSTART
},
650 { "</Common Tag>", TAG_COMMONEND
},
652 { "<no more tags>", TAG_NOMORETAGS
},
653 { "", TAG_UNKNOWN_TAG
},
657 SimpleParser::SimpleParser()
659 , aNextTag( TAG_NOMORETAGS
, TOK_INVALIDPOS
)
663 void SimpleParser::Parse( String PaSource
)
668 aNextTag
= TokenInfo( TAG_NOMORETAGS
, TOK_INVALIDPOS
);
672 TokenInfo
SimpleParser::GetNextToken( ParserMessageList
&rErrorList
)
675 USHORT nTokenStartPos
= 0;
676 if ( aNextTag
.nId
!= TAG_NOMORETAGS
)
679 aNextTag
= TokenInfo( TAG_NOMORETAGS
, TOK_INVALIDPOS
);
683 aLastToken
= GetNextTokenString( rErrorList
, nTokenStartPos
);
684 if ( aLastToken
.Len() == 0 )
685 return TokenInfo( TAG_NOMORETAGS
, TOK_INVALIDPOS
);
687 // do we have a \< ... \> style tag?
688 if ( aLastToken
.Copy(0,2).EqualsAscii( "\\<" ) )
690 // check for paired \" \"
692 USHORT nQuotePos
= 0;
693 USHORT nQuotedQuotesPos
= aLastToken
.SearchAscii( "\\\"" );
694 USHORT nQuotedBackPos
= aLastToken
.SearchAscii( "\\\\" ); // this is only to kick out quoted backslashes
695 while ( nQuotedQuotesPos
!= STRING_NOTFOUND
)
697 if ( nQuotedBackPos
<= nQuotedQuotesPos
)
698 nQuotePos
= nQuotedBackPos
+2;
701 nQuotePos
= nQuotedQuotesPos
+2;
704 nQuotedQuotesPos
= aLastToken
.SearchAscii( "\\\"", nQuotePos
);
705 nQuotedBackPos
= aLastToken
.SearchAscii( "\\\\", nQuotePos
); // this is only to kick out quoted backslashes
709 rErrorList
.AddError( 24, "Missing quotes ( \\\" ) in Tag", TokenInfo( TAG_UNKNOWN_TAG
, nTokenStartPos
, aLastToken
) );
712 // check if we have an end-tag or a start-tag
713 USHORT nNonBlankStartPos
,nNonBlankEndPos
;
714 nNonBlankStartPos
= 2;
715 while ( aLastToken
.GetChar(nNonBlankStartPos
) == ' ' )
717 if ( aLastToken
.GetChar(nNonBlankStartPos
) == '/' )
718 aResult
= TokenInfo( TAG_COMMONEND
, nTokenStartPos
, aLastToken
, rErrorList
);
721 aResult
= TokenInfo( TAG_COMMONSTART
, nTokenStartPos
, aLastToken
, rErrorList
);
722 nNonBlankEndPos
= aLastToken
.Len() -3;
723 while ( aLastToken
.GetChar(nNonBlankEndPos
) == ' ' )
725 if ( aLastToken
.GetChar( nNonBlankEndPos
) == '/' )
726 aNextTag
= TokenInfo( TAG_COMMONEND
, nTokenStartPos
, String::CreateFromAscii("\\</").Append(aResult
.GetTagName()).AppendAscii("\\>"), rErrorList
);
732 while ( aKnownTags
[i
].nTag
!= TAG_UNKNOWN_TAG
&&
733 aLastToken
!= aKnownTags
[i
].GetName() )
735 aResult
= TokenInfo( aKnownTags
[i
].nTag
, nTokenStartPos
);
739 if ( aResult
.nId
== TAG_UNKNOWN_TAG
)
740 aResult
= TokenInfo( TAG_UNKNOWN_TAG
, nTokenStartPos
, aLastToken
);
741 aTokenList
.Insert( aResult
, LIST_APPEND
);
745 String
SimpleParser::GetNextTokenString( ParserMessageList
&rErrorList
, USHORT
&rTagStartPos
)
747 // USHORT nStyle1StartPos = aSource.SearchAscii( "<#", nPos );
748 USHORT nStyle2StartPos
= aSource
.SearchAscii( "$[", nPos
);
749 USHORT nStyle3StartPos
= aSource
.SearchAscii( "\\<", nPos
);
750 USHORT nStyle4StartPos
= aSource
.SearchAscii( "\\\\", nPos
); // this is only to kick out quoted backslashes
754 /* removing since a \<... is not likely
755 // check if the tag starts with a letter to avoid things like <> <= ... >
756 while ( STRING_NOTFOUND != nStyle3StartPos && !( aSource.Copy( nStyle3StartPos+2, 1 ).IsAlphaAscii() || aSource.GetChar( nStyle3StartPos+2 ) == '/' ) )
757 nStyle3StartPos = aSource.SearchAscii( "\\<", nStyle3StartPos+1 );
759 if ( STRING_NOTFOUND
== nStyle2StartPos
&& STRING_NOTFOUND
== nStyle3StartPos
)
760 return String(); // no more tokens
762 if ( nStyle4StartPos
< nStyle2StartPos
&& nStyle4StartPos
<= nStyle3StartPos
) // <= to make sure \\ is always handled first
763 { // Skip quoted Backslash
764 nPos
= nStyle4StartPos
+2;
765 return GetNextTokenString( rErrorList
, rTagStartPos
);
768 /* if ( nStyle1StartPos < nStyle2StartPos && nStyle1StartPos <= nStyle3StartPos ) // <= to make sure our spechial tags are recognized before all others
769 { // test for <# ... > style tokens
770 USHORT nEndPos = aSource.SearchAscii( ">", nStyle1StartPos );
771 if ( nEndPos == STRING_NOTFOUND )
772 { // Token is incomplete. Skip start and search for better ones
773 nPos = nStyle1StartPos +2;
774 return GetNextTokenString( rErrorList, rTagStartPos );
777 rTagStartPos = nStyle1StartPos;
778 return aSource.Copy( nStyle1StartPos, nEndPos-nStyle1StartPos +1 ).ToUpperAscii();
780 else*/ if ( nStyle2StartPos
< nStyle3StartPos
)
781 { // test for $[ ... ] style tokens
782 USHORT nEndPos
= aSource
.SearchAscii( "]", nStyle2StartPos
);
783 if ( nEndPos
== STRING_NOTFOUND
)
784 { // Token is incomplete. Skip start and search for better ones
785 nPos
= nStyle2StartPos
+2;
786 return GetNextTokenString( rErrorList
, rTagStartPos
);
789 rTagStartPos
= nStyle2StartPos
;
790 return aSource
.Copy( nStyle2StartPos
, nEndPos
-nStyle2StartPos
+1 );
793 { // test for \< ... \> style tokens
794 USHORT nEndPos
= aSource
.SearchAscii( "\\>", nStyle3StartPos
);
795 USHORT nQuotedBackPos
= aSource
.SearchAscii( "\\\\", nStyle3StartPos
); // this is only to kick out quoted backslashes
796 while ( nQuotedBackPos
<= nEndPos
&& nQuotedBackPos
!= STRING_NOTFOUND
)
798 nEndPos
= aSource
.SearchAscii( "\\>", nQuotedBackPos
+2);
799 nQuotedBackPos
= aSource
.SearchAscii( "\\\\", nQuotedBackPos
+2 ); // this is only to kick out quoted backslashes
801 if ( nEndPos
== STRING_NOTFOUND
)
802 { // Token is incomplete. Skip start and search for better ones
803 nPos
= nStyle3StartPos
+2;
804 ByteString
sTmp( "Tag Start '\\<' without Tag End '\\>': " );
805 rErrorList
.AddError( 24, "Tag Start '\\<' without Tag End '\\>'", TokenInfo( TAG_UNKNOWN_TAG
, nStyle3StartPos
, aSource
.Copy( nStyle3StartPos
-10, 20 ) ) );
806 return GetNextTokenString( rErrorList
, rTagStartPos
);
808 // check for paired quoted " --> \"sometext\"
811 rTagStartPos
= nStyle3StartPos
;
812 return aSource
.Copy( nStyle3StartPos
, nEndPos
-nStyle3StartPos
+2 );
816 String
SimpleParser::GetLexem( TokenInfo
const &aToken
)
818 if ( aToken
.aTokenString
.Len() )
819 return aToken
.aTokenString
;
823 while ( aKnownTags
[i
].nTag
!= TAG_UNKNOWN_TAG
&&
824 aKnownTags
[i
].nTag
!= aToken
.nId
)
827 return aKnownTags
[i
].GetName();
831 TokenParser::TokenParser()
835 void TokenParser::Parse( const String
&aCode
, ParserMessageList
* pList
)
839 //Scanner initialisieren
840 aParser
.Parse( aCode
);
842 //erstes Symbol holen
843 aTag
= aParser
.GetNextToken( *pErrorList
);
847 bPfCaseActive
= FALSE
;
848 bAppCaseActive
= FALSE
;
852 //Ausfuehren der Start-Produktion
855 //Es wurde nicht die ganze Kette abgearbeitet, bisher ist aber
856 //kein Fehler aufgetreten
857 //=> es wurde ein einleitendes Tag vergessen
858 if ( aTag
.nId
!= TAG_NOMORETAGS
)
864 ParseError( 3, "Extra Tag <#END>. Switch or <#HREF> expected.", aTag
);
869 ParseError( 4, "<#BOLD> expected before <#/BOLD>.", aTag
);
874 ParseError( 5, "<#ITALIC> expected before <#/ITALIC>.", aTag
);
877 case TAG_UNDERLINEOFF
:
879 ParseError( 17, "<#UNDER> expected before <#/UNDER>.", aTag
);
882 /* case TAG_MISSPARENTHESIS:
884 ParseError( 14, "missing closing parenthesis '>'", aTag );
889 ParseError( 5, "Extra Tag <#AEND>. <#AVIS> or <#AHID> expected.", aTag
);
894 ParseError( 16, "Application-tag or platform-tag expected before <#ELSE>.", aTag
);
897 case TAG_UNKNOWN_TAG
:
899 ParseError( 6, "unknown Tag", aTag
);
904 ParseError( 6, "unexpected Tag", aTag
);
911 void TokenParser::Paragraph()
916 case TAG_NEXTVERSION
:
935 case TAG_OFFICEFULLNAME
:
938 case TAG_OFFICEVERSION
:
940 case TAG_PORTALFULLNAME
:
942 case TAG_PORTALVERSION
:
943 case TAG_PORTALSHORTNAME
:
957 case TAG_UNDERLINEON
:
958 case TAG_COMMONSTART
:
980 if ( ! bPfCaseActive
)
982 //PfCases duerfen nicht verschachtelt sein:
983 bPfCaseActive
= TRUE
;
986 //So jetzt kann wieder ein PfCase kommen:
987 bPfCaseActive
= FALSE
;
1002 if ( !bAppCaseActive
)
1004 //AppCases duerfen nicht verschachtelt sein:
1005 bAppCaseActive
= TRUE
;
1008 //jetzt koennen wieder AppCases kommen:
1009 bAppCaseActive
= FALSE
;
1015 //Case TAG_BOLDOFF, TAG_ITALICOFF, TAG_BUNDERLINE, TAG_END
1016 //nichts tun wg. epsilon-Prod.
1020 void TokenParser::PfCase()
1024 //PfCase -> PfCaseBegin Paragraph (PfCase | PfCaseEnd)
1028 //Jetzt ist eine PfCase-Produktion aktiv:
1041 case TAG_MAC
: //First (PfBegin)
1047 ParseError( 8, "<#ELSE> or <#END> or platform-tag expected.", aTag
);
1049 //Die gemerkten Tags wieder loeschen fuer naechstes PfCase:
1053 void TokenParser::PfCaseBegin()
1062 //Token darf noch nicht vorgekommen sein im
1063 //aktuellen Plattform-Case:
1064 if ( !HAS_FLAG( nPfCaseOptions
, TAG_NOGROUP( aTag
.nId
) ) )
1066 SET_FLAG( nPfCaseOptions
, TAG_NOGROUP( aTag
.nId
) );
1067 match( aTag
, aTag
);
1070 ParseError( 9, "Tag defined twice in the same platform-case", aTag
);
1076 void TokenParser::AppCase()
1080 //AppCase -> AppCaseBegin Paragraph (AppCase | AppCaseEnd)
1103 case TAG_SCHEDULE
: //First (AppBegin)
1109 ParseError( 1, "<#ELSE> or <#END> or application-case-tag expected.", aTag
);
1112 //Die gemerkten Tags wieder loeschen fuer naechstes AppCase:
1113 nAppCaseOptions
= 0;
1116 void TokenParser::AppCaseBegin()
1130 //Token darf noch nicht vorgekommen sein im
1131 //aktuellen Plattform-Case:
1132 if ( !HAS_FLAG( nAppCaseOptions
, TAG_NOGROUP( aTag
.nId
) ) )
1134 SET_FLAG( nAppCaseOptions
, TAG_NOGROUP( aTag
.nId
) );
1135 match( aTag
, aTag
);
1138 ParseError( 13, "Tag defined twice in the same application-case.", aTag
);
1144 void TokenParser::CaseEnd()
1147 //CaseEnd -> <#ELSE> Paragraph <#END> | <#END>
1153 match( aTag
, TAG_ELSE
);
1155 match( aTag
, TAG_END
);
1160 match( aTag
, TAG_END
);
1164 ParseError( 2, "<#ELSE> or <#END> expected.", aTag
);
1168 void TokenParser::SimpleTag()
1175 match( aTag
, TAG_HELPID
);
1178 case TAG_OFFICEFULLNAME
:
1179 case TAG_OFFICENAME
:
1180 case TAG_OFFICEPATH
:
1181 case TAG_OFFICEVERSION
:
1182 case TAG_PORTALNAME
:
1183 case TAG_PORTALFULLNAME
:
1184 case TAG_PORTALPATH
:
1185 case TAG_PORTALVERSION
:
1186 case TAG_PORTALSHORTNAME
:
1190 match( aTag
, aTag
);
1194 ParseError( 15, "[<#SimpleTag>] expected.", aTag
);
1198 void TokenParser::TagPair()
1204 match( aTag
, TAG_BOLDON
);
1206 match( aTag
, TAG_BOLDOFF
);
1211 match( aTag
, TAG_ITALICON
);
1213 match( aTag
, TAG_ITALICOFF
);
1216 case TAG_UNDERLINEON
:
1218 match( aTag
, TAG_UNDERLINEON
);
1220 match( aTag
, TAG_UNDERLINEOFF
);
1223 case TAG_COMMONSTART
:
1225 //remember tag so we can give the original tag in case of an error
1226 TokenInfo
aEndTag( aTag
);
1227 aEndTag
.nId
= TAG_COMMONEND
;
1228 match( aTag
, TAG_COMMONSTART
);
1230 match( aTag
, aEndTag
);
1234 ParseError( 10, "<#BOLD>, <#ITALIC>, <#UNDER> expected.", aTag
);
1239 void TokenParser::TagRef()
1244 case TAG_NEXTVERSION
:
1246 if ( !HAS_FLAG( nActiveRefTypes
, TAG_NOGROUP( aTag
.nId
) ) )
1248 TokenId aThisToken
= aTag
.nId
;
1249 SET_FLAG( nActiveRefTypes
, TAG_NOGROUP( aThisToken
) );
1250 match( aTag
, aTag
);
1252 if ( aThisToken
== TAG_GRAPHIC
)
1253 match( aTag
, TAG_ENDGRAPHIC
);
1255 match( aTag
, TAG_VERSIONEND
);
1256 // don't reset since alowed only once per paragraph
1257 // RESET_FLAG( nActiveRefTypes, TAG_NOGROUP( aThisToken ) );
1261 ParseError( 11, "Tags <#GRAPHIC>,<#NEXTVERSION> allowed only once per paragraph at", aTag
);
1268 if ( !HAS_FLAG( nActiveRefTypes
, TAG_NOGROUP( aTag
.nId
) ) )
1270 TokenId aThisToken
= aTag
.nId
;
1271 SET_FLAG( nActiveRefTypes
, TAG_NOGROUP( aThisToken
) );
1272 match( aTag
, aTag
);
1274 match( aTag
, TAG_AEND
);
1275 RESET_FLAG( nActiveRefTypes
, TAG_NOGROUP( aThisToken
) );
1279 ParseError( 11, "Nested <#AHID>,<#AVIS> not allowed.", aTag
);
1294 if ( !HAS_FLAG( nActiveRefTypes
, TAG_NOGROUP( aTag
.nId
) ) )
1296 TokenId aThisToken
= aTag
.nId
;
1297 match( aTag
, aTag
);
1298 if ( aThisToken
!= TAG_NAME
)
1299 { // TAG_NAME has no TAG_END
1300 SET_FLAG( nActiveRefTypes
, TAG_NOGROUP( aThisToken
) );
1302 match( aTag
, TAG_END
);
1303 RESET_FLAG( nActiveRefTypes
, TAG_NOGROUP( aThisToken
) );
1308 ParseError( 11, "Nested <#HREF>,<#NAME> or <#KEY> not allowed.", aTag
);
1313 ParseError( 12, "<#HREF>,<#NAME> or <#KEY> expected.", aTag
);
1317 BOOL
TokenParser::match( const TokenInfo
&aCurrentToken
, const TokenId
&aExpectedToken
)
1319 return match( aCurrentToken
, TokenInfo( aExpectedToken
, TOK_INVALIDPOS
) );
1322 BOOL
TokenParser::match( const TokenInfo
&aCurrentToken
, const TokenInfo
&rExpectedToken
)
1324 TokenInfo
aExpectedToken( rExpectedToken
);
1325 if ( aCurrentToken
.nId
== aExpectedToken
.nId
)
1327 if ( ( aCurrentToken
.nId
== TAG_COMMONEND
1328 && aCurrentToken
.GetTagName().Equals( aExpectedToken
.GetTagName() ) )
1329 || aCurrentToken
.nId
!= TAG_COMMONEND
)
1331 aTag
= aParser
.GetNextToken( *pErrorList
);
1336 if ( aExpectedToken
.nId
== TAG_COMMONEND
)
1338 aExpectedToken
.aTokenString
.Insert( String::CreateFromAscii( "Close tag for " ), 0 );
1341 ByteString
sTmp( "Expected Symbol" );
1342 if ( aCurrentToken
.nId
== TAG_NOMORETAGS
)
1344 ParseError( 7, sTmp
, aExpectedToken
);
1349 sTmp
+= ByteString( aParser
.GetLexem( aExpectedToken
), RTL_TEXTENCODING_UTF8
);
1351 ParseError( 7, sTmp
, aCurrentToken
);
1356 void TokenParser::ParseError( USHORT nErrNr
, ByteString aErrMsg
, const TokenInfo
&rTag
)
1358 pErrorList
->AddError( nErrNr
, aErrMsg
, rTag
);
1360 // Das Fehlerhafte Tag ueberspringen
1361 aTag
= aParser
.GetNextToken( *pErrorList
);
1365 ParserMessage::ParserMessage( USHORT PnErrorNr
, ByteString PaErrorText
, const TokenInfo
&rTag
)
1366 : nErrorNr( PnErrorNr
)
1367 , aErrorText( PaErrorText
)
1371 String
aLexem( SimpleParser::GetLexem( rTag
) );
1372 aErrorText
.Append(": ");
1373 aErrorText
+= ByteString( aLexem
, RTL_TEXTENCODING_UTF8
);
1374 if ( rTag
.nId
== TAG_NOMORETAGS
)
1375 aErrorText
.Append(" at end of line ");
1376 else if ( rTag
.nPos
!= TOK_INVALIDPOS
)
1378 aErrorText
.Append(" at Position ");
1379 aErrorText
.Append( ByteString::CreateFromInt32( rTag
.nPos
) );
1381 nTagBegin
= rTag
.nPos
;
1382 nTagLength
= aLexem
.Len();
1385 ParserError::ParserError( USHORT ErrorNr
, ByteString ErrorText
, const TokenInfo
&rTag
)
1386 : ParserMessage( ErrorNr
, ErrorText
, rTag
)
1389 ParserWarning::ParserWarning( USHORT ErrorNr
, ByteString ErrorText
, const TokenInfo
&rTag
)
1390 : ParserMessage( ErrorNr
, ErrorText
, rTag
)
1393 BOOL
LingTest::IsTagMandatory( TokenInfo
const &aToken
, TokenId
&aMetaTokens
)
1395 TokenId aTokenId
= aToken
.nId
;
1396 TokenId aTokenGroup
= TAG_GROUP( aTokenId
);
1397 if ( TAG_GROUP_PROGSWITCH
== aTokenGroup
1398 || TAG_REFINSERT
== aTokenId
1399 || TAG_REFSTART
== aTokenId
1400 || TAG_NAME
== aTokenId
1401 || TAG_HREF
== aTokenId
1402 || TAG_AVIS
== aTokenId
1403 || TAG_AHID
== aTokenId
1404 || TAG_GRAPHIC
== aTokenId
1405 || TAG_NEXTVERSION
== aTokenId
1406 || ( TAG_GROUP_META
== aTokenGroup
&& (aMetaTokens
& aTokenId
) == aTokenId
) )
1408 if ( TAG_GROUP_META
== aTokenGroup
)
1409 aMetaTokens
|= aTokenId
;
1412 else if ( TAG_COMMONSTART
== aTokenId
1413 || TAG_COMMONEND
== aTokenId
)
1415 String aTagName
= aToken
.GetTagName();
1416 return !(aTagName
.EqualsIgnoreCaseAscii( "comment" )
1417 || aTagName
.EqualsIgnoreCaseAscii( "bookmark_value" )
1418 || aTagName
.EqualsIgnoreCaseAscii( "emph" )
1419 || aTagName
.EqualsIgnoreCaseAscii( "item" )
1420 || aTagName
.EqualsIgnoreCaseAscii( "br" ) );
1425 void LingTest::CheckTags( TokenList
&aReference
, TokenList
&aTestee
, BOOL bFixTags
)
1428 // Clean old Warnings
1429 while ( aCompareWarningList
.Count() )
1431 delete aCompareWarningList
.GetCurObject();
1432 aCompareWarningList
.Remove();
1435 /* in xml tags, do not require the following tags
1443 // filter uninteresting Tags
1444 TokenId aMetaTokens
= 0;
1445 for ( i
=0 ; i
< aReference
.Count() ; i
++ )
1447 if ( !IsTagMandatory( aReference
.GetObject( i
), aMetaTokens
) )
1448 aReference
.GetObject( i
).SetDone();
1452 for ( i
=0 ; i
< aTestee
.Count() ; i
++ )
1454 if ( !IsTagMandatory( aTestee
.GetObject( i
), aMetaTokens
) )
1455 aTestee
.GetObject( i
).SetDone();
1458 // remove all matching tags
1459 for ( i
=0 ; i
< aReference
.Count() ; i
++ )
1461 if ( aReference
.GetObject( i
).IsDone() )
1464 BOOL bTagFound
= FALSE
;
1465 for ( j
=0 ; j
< aTestee
.Count() && !bTagFound
; j
++ )
1467 if ( aTestee
.GetObject( j
).IsDone() )
1470 if ( aReference
.GetObject( i
).MatchesTranslation( aTestee
.GetObject( j
), FALSE
, aCompareWarningList
) )
1472 aReference
.GetObject( i
).SetDone();
1473 aTestee
.GetObject( j
).SetDone();
1479 BOOL bCanFix
= TRUE
;
1483 // we fix only if its a really simple case
1484 USHORT nTagCount
= 0;
1485 for ( i
=0 ; i
< aReference
.Count() ; i
++ )
1486 if ( !aReference
.GetObject( i
).IsDone() )
1488 if ( nTagCount
> 1 )
1492 for ( i
=0 ; i
< aTestee
.Count() ; i
++ )
1493 if ( !aTestee
.GetObject( i
).IsDone() )
1495 if ( nTagCount
> 1 )
1499 // generate errors for tags that have differing attributes
1500 for ( i
=0 ; i
< aReference
.Count() ; i
++ )
1502 if ( aReference
.GetObject( i
).IsDone() )
1505 BOOL bTagFound
= FALSE
;
1506 for ( j
=0 ; j
< aTestee
.Count() && !bTagFound
; j
++ )
1508 if ( aTestee
.GetObject( j
).IsDone() )
1511 if ( aReference
.GetObject( i
).MatchesTranslation( aTestee
.GetObject( j
), TRUE
, aCompareWarningList
, bCanFix
&& bFixTags
) )
1513 aReference
.GetObject( i
).SetDone();
1514 aTestee
.GetObject( j
).SetDone();
1520 // list remaining tags as errors
1521 for ( i
=0 ; i
< aReference
.Count() ; i
++ )
1523 if ( aReference
.GetObject( i
).IsDone() )
1526 aCompareWarningList
.AddError( 20, "Missing Tag in Translation", aReference
.GetObject( i
) );
1528 for ( i
=0 ; i
< aTestee
.Count() ; i
++ )
1530 if ( aTestee
.GetObject( i
).IsDone() )
1533 aCompareWarningList
.AddError( 21, "Extra Tag in Translation", aTestee
.GetObject( i
) );
1536 for ( i
=0 ; i
< aReference
.Count() ; i
++ )
1537 aReference
.GetObject( i
).SetDone( FALSE
);
1539 for ( i
=0 ; i
< aTestee
.Count() ; i
++ )
1540 aTestee
.GetObject( i
).SetDone( FALSE
);
1543 void LingTest::CheckReference( GSILine
*aReference
)
1545 aReferenceParser
.Parse( aReference
->GetUText(), aReference
->GetMessageList() );
1548 void LingTest::CheckTestee( GSILine
*aTestee
, BOOL bHasSourceLine
, BOOL bFixTags
)
1550 aFixedTestee
= aTestee
->GetUText();
1551 aTesteeParser
.Parse( aFixedTestee
, aTestee
->GetMessageList() );
1553 if ( bHasSourceLine
)
1554 CheckTags( aReferenceParser
.GetTokenList(), aTesteeParser
.GetTokenList(), bFixTags
);
1558 TokenList
& aTesteeTokens
= aTesteeParser
.GetTokenList();
1559 BOOL bFixesDone
= FALSE
;
1560 // count backwards to allow replacing from right to left
1562 for ( i
=aTesteeTokens
.Count()-1 ; i
>=0 ; i
-- )
1564 if ( aTesteeTokens
.GetObject( i
).HasBeenFixed() )
1567 aFixedTestee
.Replace( aTesteeTokens
.GetObject( i
).nPos
, aTesteeTokens
.GetObject( i
).aTokenString
.Len(), aTesteeTokens
.GetObject( i
).MakeTag() );
1572 aTestee
->SetUText( aFixedTestee
);
1573 aTestee
->SetFixed();