1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include "sal/config.h"
21 #include <iterator> /* std::iterator*/
25 #include <sal/alloca.h>
29 #include "xmlparse.hxx"
32 #include <osl/mutex.hxx>
33 #include <osl/thread.hxx>
34 #include <osl/process.h>
35 #include <rtl/strbuf.hxx>
36 #include <unicode/regex.h>
38 using namespace U_ICU_NAMESPACE
;
42 #define XML_LANG "xml-lang"
44 #define OLDREF "oldref"
50 XMLChildNode::XMLChildNode( XMLParentNode
*pPar
)
54 m_pParent
->AddChild( this );
58 XMLChildNode::XMLChildNode( const XMLChildNode
& rObj
)
60 m_pParent(rObj
.m_pParent
)
64 XMLChildNode
& XMLChildNode::operator=(const XMLChildNode
& rObj
)
68 m_pParent
=rObj
.m_pParent
;
74 // class XMLParentNode
77 XMLParentNode::~XMLParentNode()
81 RemoveAndDeleteAllChildren();
86 XMLParentNode::XMLParentNode( const XMLParentNode
& rObj
)
87 : XMLChildNode( rObj
)
89 if( rObj
.m_pChildList
)
91 m_pChildList
=new XMLChildNodeList();
92 for ( size_t i
= 0; i
< rObj
.m_pChildList
->size(); i
++ )
94 XMLChildNode
* pNode
= (*rObj
.m_pChildList
)[ i
];
97 switch(pNode
->GetNodeType())
99 case XML_NODE_TYPE_ELEMENT
:
100 AddChild( new XMLElement( *static_cast<XMLElement
* >(pNode
) ) ); break;
101 case XML_NODE_TYPE_DATA
:
102 AddChild( new XMLData ( *static_cast<XMLData
* > (pNode
) ) ); break;
103 case XML_NODE_TYPE_COMMENT
:
104 AddChild( new XMLComment( *static_cast<XMLComment
* >(pNode
) ) ); break;
105 case XML_NODE_TYPE_DEFAULT
:
106 AddChild( new XMLDefault( *static_cast<XMLDefault
* >(pNode
) ) ); break;
107 default: fprintf(stdout
,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
115 XMLParentNode
& XMLParentNode::operator=(const XMLParentNode
& rObj
)
119 XMLChildNode::operator=(rObj
);
122 RemoveAndDeleteAllChildren();
126 if( rObj
.m_pChildList
)
128 m_pChildList
=new XMLChildNodeList();
129 for ( size_t i
= 0; i
< rObj
.m_pChildList
->size(); i
++ )
130 AddChild( (*rObj
.m_pChildList
)[ i
] );
138 void XMLParentNode::AddChild( XMLChildNode
*pChild
)
141 m_pChildList
= new XMLChildNodeList();
142 m_pChildList
->push_back( pChild
);
145 void XMLParentNode::RemoveAndDeleteAllChildren()
149 for ( size_t i
= 0; i
< m_pChildList
->size(); i
++ )
150 delete (*m_pChildList
)[ i
];
151 m_pChildList
->clear();
159 void XMLFile::Write( OString
const &aFilename
)
162 aFilename
.getStr(), std::ios_base::out
| std::ios_base::trunc
);
166 << "Error: helpex cannot create file " << aFilename
.getStr()
168 std::exit(EXIT_FAILURE
);
174 bool XMLFile::Write( ofstream
&rStream
, XMLNode
*pCur
)
177 Write( rStream
, this );
179 switch( pCur
->GetNodeType())
181 case XML_NODE_TYPE_FILE
:
184 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
185 Write( rStream
, (*GetChildList())[ i
] );
188 case XML_NODE_TYPE_ELEMENT
:
190 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
192 rStream
<< pElement
->GetName().getStr();
193 if ( pElement
->GetAttributeList())
194 for ( size_t j
= 0; j
< pElement
->GetAttributeList()->size(); j
++ )
197 OString
sData( (*pElement
->GetAttributeList())[ j
]->GetName() );
198 rStream
<< XMLUtil::QuotHTML( sData
).getStr();
200 sData
= (*pElement
->GetAttributeList())[ j
]->GetValue();
201 rStream
<< XMLUtil::QuotHTML( sData
).getStr();
204 if ( !pElement
->GetChildList())
209 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
210 Write( rStream
, (*pElement
->GetChildList())[ k
] );
212 rStream
<< pElement
->GetName().getStr();
217 case XML_NODE_TYPE_DATA
:
219 OString
sData( static_cast<const XMLData
*>(pCur
)->GetData());
220 rStream
<< XMLUtil::QuotHTML( sData
).getStr();
223 case XML_NODE_TYPE_COMMENT
:
225 const XMLComment
*pComment
= static_cast<const XMLComment
*>(pCur
);
227 rStream
<< pComment
->GetComment().getStr();
231 case XML_NODE_TYPE_DEFAULT
:
233 const XMLDefault
*pDefault
= static_cast<const XMLDefault
*>(pCur
);
234 rStream
<< pDefault
->GetDefault().getStr();
242 void XMLFile::Print( XMLNode
*pCur
, sal_uInt16 nLevel
)
248 switch( pCur
->GetNodeType())
250 case XML_NODE_TYPE_FILE
:
253 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
254 Print( (*GetChildList())[ i
] );
257 case XML_NODE_TYPE_ELEMENT
:
259 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
261 fprintf( stdout
, "<%s", pElement
->GetName().getStr());
262 if ( pElement
->GetAttributeList())
264 for (size_t j
= 0; j
< pElement
->GetAttributeList()->size(); ++j
)
266 const OString
aAttrName((*pElement
->GetAttributeList())[j
]->GetName());
267 if (!aAttrName
.equalsIgnoreAsciiCase(XML_LANG
))
269 fprintf( stdout
, " %s=\"%s\"",
271 (*pElement
->GetAttributeList())[ j
]->GetValue().getStr());
275 if ( !pElement
->GetChildList())
276 fprintf( stdout
, "/>" );
279 fprintf( stdout
, ">" );
280 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
281 Print( (*pElement
->GetChildList())[ k
], nLevel
+ 1 );
282 fprintf( stdout
, "</%s>", pElement
->GetName().getStr());
286 case XML_NODE_TYPE_DATA
:
288 const XMLData
*pData
= static_cast<const XMLData
*>(pCur
);
289 fprintf( stdout
, "%s", pData
->GetData().getStr());
292 case XML_NODE_TYPE_COMMENT
:
294 const XMLComment
*pComment
= static_cast<const XMLComment
*>(pCur
);
295 fprintf( stdout
, "<!--%s-->", pComment
->GetComment().getStr());
298 case XML_NODE_TYPE_DEFAULT
:
300 const XMLDefault
*pDefault
= static_cast<const XMLDefault
*>(pCur
);
301 fprintf( stdout
, "%s", pDefault
->GetDefault().getStr());
309 if( m_pXMLStrings
!= NULL
)
311 XMLHashMap::iterator pos
= m_pXMLStrings
->begin();
312 for( ; pos
!= m_pXMLStrings
->end() ; ++pos
)
314 delete pos
->second
; // Check and delete content also ?
316 delete m_pXMLStrings
;
317 m_pXMLStrings
= NULL
;
320 XMLFile::XMLFile( const OString
&rFileName
) // the file name, empty if created from memory stream
321 : XMLParentNode( NULL
)
322 , m_sFileName( rFileName
)
323 , m_pXMLStrings( NULL
)
325 m_aNodes_localize
.insert( TagMap::value_type(OString("bookmark") , sal_True
) );
326 m_aNodes_localize
.insert( TagMap::value_type(OString("variable") , sal_True
) );
327 m_aNodes_localize
.insert( TagMap::value_type(OString("paragraph") , sal_True
) );
328 m_aNodes_localize
.insert( TagMap::value_type(OString("alt") , sal_True
) );
329 m_aNodes_localize
.insert( TagMap::value_type(OString("caption") , sal_True
) );
330 m_aNodes_localize
.insert( TagMap::value_type(OString("title") , sal_True
) );
331 m_aNodes_localize
.insert( TagMap::value_type(OString("link") , sal_True
) );
334 void XMLFile::Extract( XMLFile
*pCur
)
337 delete m_pXMLStrings
; // Elements ?
339 m_pXMLStrings
= new XMLHashMap();
341 SearchL10NElements( this );
344 if( pCur
->GetNodeType() == XML_NODE_TYPE_FILE
)
346 SearchL10NElements(pCur
);
351 void XMLFile::InsertL10NElement( XMLElement
* pElement
)
353 OString sId
, sLanguage("");
356 if( pElement
->GetAttributeList() != NULL
)
358 for ( size_t j
= 0; j
< pElement
->GetAttributeList()->size(); j
++ )
360 const OString
sTempStr((*pElement
->GetAttributeList())[ j
]->GetName());
361 // Get the "id" Attribute
364 sId
= (*pElement
->GetAttributeList())[ j
]->GetValue();
366 // Get the "xml-lang" Attribute
367 if (sTempStr
== XML_LANG
)
369 sLanguage
= (*pElement
->GetAttributeList())[j
]->GetValue();
376 fprintf(stdout
,"XMLFile::InsertL10NElement: No AttributeList found");
377 fprintf(stdout
,"++++++++++++++++++++++++++++++++++++++++++++++++++");
378 Print( pElement
, 0 );
379 fprintf(stdout
,"++++++++++++++++++++++++++++++++++++++++++++++++++");
382 XMLHashMap::iterator pos
= m_pXMLStrings
->find( sId
);
383 if( pos
== m_pXMLStrings
->end() ) // No instanze , create new one
385 pElem
= new LangHashMap();
386 (*pElem
)[ sLanguage
]=pElement
;
387 m_pXMLStrings
->insert( XMLHashMap::value_type( sId
, pElem
) );
388 m_vOrder
.push_back( sId
);
390 else // Already there
393 if ( (*pElem
)[ sLanguage
] )
395 fprintf(stdout
,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId
.getStr(), sLanguage
.getStr(), m_sFileName
.getStr() );
398 (*pElem
)[ sLanguage
]=pElement
;
402 XMLFile::XMLFile( const XMLFile
& rObj
)
403 : XMLParentNode( rObj
)
404 , m_sFileName( rObj
.m_sFileName
)
409 m_aNodes_localize
= rObj
.m_aNodes_localize
;
410 m_vOrder
= rObj
.m_vOrder
;
414 XMLFile
& XMLFile::operator=(const XMLFile
& rObj
)
418 XMLParentNode::operator=(rObj
);
420 m_aNodes_localize
= rObj
.m_aNodes_localize
;
421 m_vOrder
= rObj
.m_vOrder
;
424 delete m_pXMLStrings
;
426 if( rObj
.m_pXMLStrings
)
428 m_pXMLStrings
= new XMLHashMap();
429 for( XMLHashMap::iterator pos
= rObj
.m_pXMLStrings
->begin() ; pos
!= rObj
.m_pXMLStrings
->end() ; ++pos
)
431 LangHashMap
* pElem
=pos
->second
;
432 LangHashMap
* pNewelem
= new LangHashMap();
433 for(LangHashMap::iterator pos2
=pElem
->begin(); pos2
!=pElem
->end();++pos2
)
435 (*pNewelem
)[ pos2
->first
] = new XMLElement( *pos2
->second
);
437 (*m_pXMLStrings
)[ pos
->first
] = pNewelem
;
444 void XMLFile::SearchL10NElements( XMLChildNode
*pCur
, int nPos
)
448 SearchL10NElements( this );
451 switch( pCur
->GetNodeType())
453 case XML_NODE_TYPE_FILE
:
455 XMLChildNode
* pElement
;
458 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
460 pElement
= (*GetChildList())[ i
];
461 if( pElement
->GetNodeType() == XML_NODE_TYPE_ELEMENT
)
462 SearchL10NElements( pElement
, i
);
467 case XML_NODE_TYPE_ELEMENT
:
469 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
470 const OString
sName(pElement
->GetName().toAsciiLowerCase());
471 OString sLanguage
, sTmpStrVal
, sOldref
;
472 if ( pElement
->GetAttributeList())
474 for ( size_t j
= 0 , cnt
= pElement
->GetAttributeList()->size(); j
< cnt
&& bInsert
; ++j
)
476 const OString sTmpStr
= (*pElement
->GetAttributeList())[j
]->GetName();
479 sTmpStrVal
=(*pElement
->GetAttributeList())[ j
]->GetValue();
481 if (sTmpStr
== "localize")
485 if (sTmpStr
== XML_LANG
) // Get the "xml-lang" Attribute
487 sLanguage
=(*pElement
->GetAttributeList())[ j
]->GetValue();
489 if (sTmpStr
== OLDREF
) // Get the "oldref" Attribute
491 sOldref
=(*pElement
->GetAttributeList())[ j
]->GetValue();
494 pElement
->SetLanguageId( sLanguage
);
495 pElement
->SetId( sTmpStrVal
);
496 pElement
->SetOldRef( sOldref
);
497 pElement
->SetPos( nPos
);
500 if ( bInsert
&& ( m_aNodes_localize
.find( sName
) != m_aNodes_localize
.end() ) )
501 InsertL10NElement(pElement
);
502 else if ( bInsert
&& pElement
->GetChildList() )
504 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
505 SearchL10NElements( (*pElement
->GetChildList())[ k
], k
);
509 case XML_NODE_TYPE_DATA
:
511 case XML_NODE_TYPE_COMMENT
:
513 case XML_NODE_TYPE_DEFAULT
:
519 bool XMLFile::CheckExportStatus( XMLParentNode
*pCur
)
521 static bool bStatusExport
= true;
525 CheckExportStatus( this );
527 switch( pCur
->GetNodeType())
529 case XML_NODE_TYPE_FILE
:
533 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
535 XMLParentNode
* pElement
= static_cast<XMLParentNode
*>((*GetChildList())[ i
]);
536 if( pElement
->GetNodeType() == XML_NODE_TYPE_ELEMENT
) CheckExportStatus( pElement
);//, i);
541 case XML_NODE_TYPE_ELEMENT
:
543 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
544 if (pElement
->GetName().equalsIgnoreAsciiCase("TOPIC"))
546 if ( pElement
->GetAttributeList())
548 for (size_t j
= 0 , cnt
= pElement
->GetAttributeList()->size(); j
< cnt
&& bInsert
; ++j
)
550 const OString
tmpStr((*pElement
->GetAttributeList())[j
]->GetName());
551 if (tmpStr
.equalsIgnoreAsciiCase("STATUS"))
553 const OString
tmpStrVal((*pElement
->GetAttributeList())[j
]->GetValue());
554 if (!tmpStrVal
.equalsIgnoreAsciiCase("PUBLISH") &&
555 !tmpStrVal
.equalsIgnoreAsciiCase("DEPRECATED"))
557 bStatusExport
= false;
564 else if ( pElement
->GetChildList() )
566 for (size_t k
= 0; k
< pElement
->GetChildList()->size(); ++k
)
567 CheckExportStatus( static_cast<XMLParentNode
*>((*pElement
->GetChildList())[k
]) );
573 return bStatusExport
;
576 XMLElement::XMLElement(
577 const OString
&rName
, // the element name
578 XMLParentNode
*pParent
// parent node of this element
580 : XMLParentNode( pParent
)
581 , m_sElementName( rName
)
582 , m_pAttributes( NULL
)
583 , m_sProject(OString())
584 , m_sFilename(OString())
586 , m_sOldRef(OString())
587 , m_sResourceType(OString())
588 , m_sLanguageId(OString())
593 XMLElement::XMLElement(const XMLElement
& rObj
)
594 : XMLParentNode( rObj
)
595 , m_sElementName( rObj
.m_sElementName
)
597 , m_sProject( rObj
.m_sProject
)
598 , m_sFilename( rObj
.m_sFilename
)
599 , m_sId( rObj
.m_sId
)
600 , m_sOldRef( rObj
.m_sOldRef
)
601 , m_sResourceType( rObj
.m_sResourceType
)
602 , m_sLanguageId( rObj
.m_sLanguageId
)
603 , m_nPos( rObj
.m_nPos
)
605 if ( rObj
.m_pAttributes
)
607 m_pAttributes
= new XMLAttributeList();
608 for ( size_t i
= 0; i
< rObj
.m_pAttributes
->size(); i
++ )
609 AddAttribute( (*rObj
.m_pAttributes
)[ i
]->GetName(), (*rObj
.m_pAttributes
)[ i
]->GetValue() );
613 XMLElement
& XMLElement::operator=(const XMLElement
& rObj
)
617 XMLParentNode::operator=(rObj
);
618 m_sElementName
= rObj
.m_sElementName
;
619 m_sProject
= rObj
.m_sProject
;
620 m_sFilename
= rObj
.m_sFilename
;
622 m_sOldRef
= rObj
.m_sOldRef
;
623 m_sResourceType
= rObj
.m_sResourceType
;
624 m_sLanguageId
= rObj
.m_sLanguageId
;
625 m_nPos
= rObj
.m_nPos
;
629 for ( size_t i
= 0; i
< m_pAttributes
->size(); i
++ )
630 delete (*m_pAttributes
)[ i
];
631 delete m_pAttributes
;
633 if ( rObj
.m_pAttributes
)
635 m_pAttributes
= new XMLAttributeList();
636 for ( size_t i
= 0; i
< rObj
.m_pAttributes
->size(); i
++ )
637 AddAttribute( (*rObj
.m_pAttributes
)[ i
]->GetName(), (*rObj
.m_pAttributes
)[ i
]->GetValue() );
643 void XMLElement::AddAttribute( const OString
&rAttribute
, const OString
&rValue
)
645 if ( !m_pAttributes
)
646 m_pAttributes
= new XMLAttributeList();
647 m_pAttributes
->push_back( new XMLAttribute( rAttribute
, rValue
) );
650 void XMLElement::ChangeLanguageTag( const OString
&rValue
)
652 SetLanguageId(rValue
);
655 for (size_t i
= 0; i
< m_pAttributes
->size(); ++i
)
657 if ( (*m_pAttributes
)[ i
]->GetName() == "xml-lang" )
658 (*m_pAttributes
)[ i
]->setValue(rValue
);
661 XMLChildNodeList
* pCList
= GetChildList();
665 for ( size_t i
= 0; i
< pCList
->size(); i
++ )
667 XMLChildNode
* pNode
= (*pCList
)[ i
];
668 if( pNode
&& pNode
->GetNodeType() == XML_NODE_TYPE_ELEMENT
)
670 XMLElement
* pElem
= static_cast< XMLElement
* >(pNode
);
671 pElem
->ChangeLanguageTag( rValue
);
672 pElem
->SetLanguageId(rValue
);
681 XMLElement::~XMLElement()
685 for ( size_t i
= 0; i
< m_pAttributes
->size(); i
++ )
686 delete (*m_pAttributes
)[ i
];
688 delete m_pAttributes
;
689 m_pAttributes
= NULL
;
693 OString
XMLElement::ToOString()
695 OStringBuffer sBuffer
;
696 Print(this,sBuffer
,true);
697 return sBuffer
.makeStringAndClear();
700 void XMLElement::Print(XMLNode
*pCur
, OStringBuffer
& rBuffer
, bool bRootelement
) const
706 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
707 if ( pElement
->GetAttributeList())
709 if ( pElement
->GetChildList())
711 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
713 XMLChildNode
* pTmp
= (*pElement
->GetChildList())[ k
];
714 Print( pTmp
, rBuffer
, false);
721 switch( pCur
->GetNodeType())
723 case XML_NODE_TYPE_ELEMENT
:
725 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
727 if( !pElement
->GetName().equalsIgnoreAsciiCase("comment") )
729 rBuffer
.append( "<" );
730 rBuffer
.append( pElement
->GetName() );
731 if ( pElement
->GetAttributeList())
733 for ( size_t j
= 0; j
< pElement
->GetAttributeList()->size(); j
++ )
735 const OString
aAttrName( (*pElement
->GetAttributeList())[ j
]->GetName() );
736 if( !aAttrName
.equalsIgnoreAsciiCase( "xml-lang" ) )
739 " " + aAttrName
+ "=\"" +
740 (*pElement
->GetAttributeList())[ j
]->GetValue() + "\"" );
744 if ( !pElement
->GetChildList())
745 rBuffer
.append( "/>" );
748 rBuffer
.append( ">" );
749 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
751 XMLChildNode
* pTmp
= (*pElement
->GetChildList())[ k
];
752 Print( pTmp
, rBuffer
, false);
754 rBuffer
.append( "</" + pElement
->GetName() + ">" );
759 case XML_NODE_TYPE_DATA
:
761 const XMLData
*pData
= static_cast<const XMLData
*>(pCur
);
762 rBuffer
.append( pData
->GetData() );
765 case XML_NODE_TYPE_COMMENT
:
767 const XMLComment
*pComment
= static_cast<const XMLComment
*>(pCur
);
768 rBuffer
.append( "<!--" + pComment
->GetComment() + "-->" );
771 case XML_NODE_TYPE_DEFAULT
:
773 const XMLDefault
*pDefault
= static_cast<const XMLDefault
*>(pCur
);
774 rBuffer
.append( pDefault
->GetDefault() );
782 fprintf(stdout
,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
788 // class SimpleXMLParser
794 static OUString
lcl_pathnameToAbsoluteUrl(const OString
& rPathname
)
796 OUString sPath
= OStringToOUString(rPathname
, RTL_TEXTENCODING_UTF8
);
798 if (osl::FileBase::getFileURLFromSystemPath(sPath
, sUrl
)
799 != osl::FileBase::E_None
)
801 std::cerr
<< "Error: Cannot convert input pathname to URL\n";
802 std::exit(EXIT_FAILURE
);
805 if (osl_getProcessWorkingDir(&sCwd
.pData
) != osl_Process_E_None
)
807 std::cerr
<< "Error: Cannot determine cwd\n";
808 std::exit(EXIT_FAILURE
);
810 if (osl::FileBase::getAbsoluteFileURL(sCwd
, sUrl
, sUrl
)
811 != osl::FileBase::E_None
)
813 std::cerr
<< "Error: Cannot convert input URL to absolute URL\n";
814 std::exit(EXIT_FAILURE
);
821 SimpleXMLParser::SimpleXMLParser()
826 m_aParser
= XML_ParserCreate( NULL
);
827 XML_SetUserData( m_aParser
, this );
828 XML_SetElementHandler( m_aParser
, (XML_StartElementHandler
) StartElementHandler
, (XML_EndElementHandler
) EndElementHandler
);
829 XML_SetCharacterDataHandler( m_aParser
, (XML_CharacterDataHandler
) CharacterDataHandler
);
830 XML_SetCommentHandler( m_aParser
, (XML_CommentHandler
) CommentHandler
);
831 XML_SetDefaultHandler( m_aParser
, (XML_DefaultHandler
) DefaultHandler
);
834 SimpleXMLParser::~SimpleXMLParser()
836 XML_ParserFree( m_aParser
);
839 void SimpleXMLParser::StartElementHandler(
840 void *userData
, const XML_Char
*name
, const XML_Char
**atts
)
842 static_cast<SimpleXMLParser
*>(userData
)->StartElement( name
, atts
);
845 void SimpleXMLParser::EndElementHandler(
846 void *userData
, const XML_Char
*name
)
848 static_cast<SimpleXMLParser
*>(userData
)->EndElement( name
);
851 void SimpleXMLParser::CharacterDataHandler(
852 void *userData
, const XML_Char
*s
, int len
)
854 static_cast<SimpleXMLParser
*>(userData
)->CharacterData( s
, len
);
857 void SimpleXMLParser::CommentHandler(
858 void *userData
, const XML_Char
*data
)
860 static_cast<SimpleXMLParser
*>(userData
)->Comment( data
);
863 void SimpleXMLParser::DefaultHandler(
864 void *userData
, const XML_Char
*s
, int len
)
866 static_cast<SimpleXMLParser
*>(userData
)->Default( s
, len
);
869 void SimpleXMLParser::StartElement(
870 const XML_Char
*name
, const XML_Char
**atts
)
872 XMLElement
*pElement
= new XMLElement( OString(name
), m_pCurNode
);
873 m_pCurNode
= pElement
;
879 pElement
->AddAttribute( atts
[ i
], atts
[ i
+ 1 ] );
884 void SimpleXMLParser::EndElement( const XML_Char
* /*name*/ )
886 m_pCurNode
= m_pCurNode
->GetParent();
890 void SimpleXMLParser::CharacterData( const XML_Char
*s
, int len
)
895 m_pCurData
= new XMLData( helper::UnQuotHTML(x
) , m_pCurNode
);
900 m_pCurData
->AddData( helper::UnQuotHTML(x
) );
905 void SimpleXMLParser::Comment( const XML_Char
*data
)
908 new XMLComment( OString( data
), m_pCurNode
);
911 void SimpleXMLParser::Default( const XML_Char
*s
, int len
)
914 new XMLDefault(OString( s
, len
), m_pCurNode
);
917 XMLFile
*SimpleXMLParser::Execute( const OString
&rFileName
, XMLFile
* pXMLFileIn
)
919 m_aErrorInformation
.m_eCode
= XML_ERROR_NONE
;
920 m_aErrorInformation
.m_nLine
= 0;
921 m_aErrorInformation
.m_nColumn
= 0;
922 m_aErrorInformation
.m_sMessage
= "ERROR: Unable to open file ";
923 m_aErrorInformation
.m_sMessage
+= rFileName
;
925 OUString
aFileURL(lcl_pathnameToAbsoluteUrl(rFileName
));
928 if (osl_openFile(aFileURL
.pData
, &h
, osl_File_OpenFlag_Read
)
935 oslFileError e
= osl_getFileSize(h
, &s
);
937 if (e
== osl_File_E_None
)
939 e
= osl_mapFile(h
, &p
, s
, 0, 0);
941 if (e
!= osl_File_E_None
)
947 m_pXMLFile
= pXMLFileIn
;
948 m_pXMLFile
->SetName( rFileName
);
950 m_pCurNode
= m_pXMLFile
;
953 m_aErrorInformation
.m_eCode
= XML_ERROR_NONE
;
954 m_aErrorInformation
.m_nLine
= 0;
955 m_aErrorInformation
.m_nColumn
= 0;
956 if ( !m_pXMLFile
->GetName().isEmpty())
958 m_aErrorInformation
.m_sMessage
= "File " + m_pXMLFile
->GetName() + " parsed successfully";
961 m_aErrorInformation
.m_sMessage
= "XML-File parsed successfully";
963 if (!XML_Parse(m_aParser
, static_cast< char * >(p
), s
, true))
965 m_aErrorInformation
.m_eCode
= XML_GetErrorCode( m_aParser
);
966 m_aErrorInformation
.m_nLine
= XML_GetErrorLineNumber( m_aParser
);
967 m_aErrorInformation
.m_nColumn
= XML_GetErrorColumnNumber( m_aParser
);
969 m_aErrorInformation
.m_sMessage
= "ERROR: ";
970 if ( !m_pXMLFile
->GetName().isEmpty())
971 m_aErrorInformation
.m_sMessage
+= m_pXMLFile
->GetName();
973 m_aErrorInformation
.m_sMessage
+= OString( "XML-File (");
975 m_aErrorInformation
.m_sMessage
+=
976 OString::number(sal::static_int_cast
< sal_Int64
>(m_aErrorInformation
.m_nLine
)) + "," +
977 OString::number(sal::static_int_cast
< sal_Int64
>(m_aErrorInformation
.m_nColumn
)) + "): ";
979 switch (m_aErrorInformation
.m_eCode
)
981 case XML_ERROR_NO_MEMORY
:
982 m_aErrorInformation
.m_sMessage
+= "No memory";
984 case XML_ERROR_SYNTAX
:
985 m_aErrorInformation
.m_sMessage
+= "Syntax";
987 case XML_ERROR_NO_ELEMENTS
:
988 m_aErrorInformation
.m_sMessage
+= "No elements";
990 case XML_ERROR_INVALID_TOKEN
:
991 m_aErrorInformation
.m_sMessage
+= "Invalid token";
993 case XML_ERROR_UNCLOSED_TOKEN
:
994 m_aErrorInformation
.m_sMessage
+= "Unclosed token";
996 case XML_ERROR_PARTIAL_CHAR
:
997 m_aErrorInformation
.m_sMessage
+= "Partial char";
999 case XML_ERROR_TAG_MISMATCH
:
1000 m_aErrorInformation
.m_sMessage
+= "Tag mismatch";
1002 case XML_ERROR_DUPLICATE_ATTRIBUTE
:
1003 m_aErrorInformation
.m_sMessage
+= "Dublicat attribute";
1005 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT
:
1006 m_aErrorInformation
.m_sMessage
+= "Junk after doc element";
1008 case XML_ERROR_PARAM_ENTITY_REF
:
1009 m_aErrorInformation
.m_sMessage
+= "Param entity ref";
1011 case XML_ERROR_UNDEFINED_ENTITY
:
1012 m_aErrorInformation
.m_sMessage
+= "Undefined entity";
1014 case XML_ERROR_RECURSIVE_ENTITY_REF
:
1015 m_aErrorInformation
.m_sMessage
+= "Recursive entity ref";
1017 case XML_ERROR_ASYNC_ENTITY
:
1018 m_aErrorInformation
.m_sMessage
+= "Async_entity";
1020 case XML_ERROR_BAD_CHAR_REF
:
1021 m_aErrorInformation
.m_sMessage
+= "Bad char ref";
1023 case XML_ERROR_BINARY_ENTITY_REF
:
1024 m_aErrorInformation
.m_sMessage
+= "Binary entity";
1026 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
:
1027 m_aErrorInformation
.m_sMessage
+= "Attribute external entity ref";
1029 case XML_ERROR_MISPLACED_XML_PI
:
1030 m_aErrorInformation
.m_sMessage
+= "Misplaced xml pi";
1032 case XML_ERROR_UNKNOWN_ENCODING
:
1033 m_aErrorInformation
.m_sMessage
+= "Unknown encoding";
1035 case XML_ERROR_INCORRECT_ENCODING
:
1036 m_aErrorInformation
.m_sMessage
+= "Incorrect encoding";
1038 case XML_ERROR_UNCLOSED_CDATA_SECTION
:
1039 m_aErrorInformation
.m_sMessage
+= "Unclosed cdata section";
1041 case XML_ERROR_EXTERNAL_ENTITY_HANDLING
:
1042 m_aErrorInformation
.m_sMessage
+= "External entity handling";
1044 case XML_ERROR_NOT_STANDALONE
:
1045 m_aErrorInformation
.m_sMessage
+= "Not standalone";
1047 case XML_ERROR_NONE
:
1056 osl_unmapMappedFile(h
, p
, s
);
1065 static icu::UnicodeString
lcl_QuotRange(
1066 const icu::UnicodeString
& rString
, const sal_Int32 nStart
,
1067 const sal_Int32 nEnd
, bool bInsideTag
= false )
1069 icu::UnicodeString sReturn
;
1070 assert( nStart
< nEnd
);
1071 assert( nStart
>= 0 );
1072 assert( nEnd
<= rString
.length() );
1073 for (sal_Int32 i
= nStart
; i
< nEnd
; ++i
)
1078 sReturn
.append("<");
1081 sReturn
.append(">");
1085 sReturn
.append(""");
1087 sReturn
.append(rString
[i
]);
1090 if (rString
.startsWith("&", i
, 5))
1091 sReturn
.append('&');
1093 sReturn
.append("&");
1096 sReturn
.append(rString
[i
]);
1103 static bool lcl_isTag( const icu::UnicodeString
& rString
)
1105 static const int nSize
= 13;
1106 static const icu::UnicodeString vTags
[nSize
] = {
1107 "ahelp", "link", "item", "emph", "defaultinline",
1108 "switchinline", "caseinline", "variable",
1109 "bookmark_value", "image", "embedvar", "alt", "sup" };
1111 for( int nIndex
= 0; nIndex
< nSize
; ++nIndex
)
1113 if( rString
.startsWith("<" + vTags
[nIndex
]) ||
1114 rString
== "</" + vTags
[nIndex
] + ">" )
1118 return rString
== "<br/>" || rString
=="<help-id-missing/>";
1121 } /// anonymous namespace
1123 OString
XMLUtil::QuotHTML( const OString
&rString
)
1125 if( rString
.trim().isEmpty() )
1127 UErrorCode nIcuErr
= U_ZERO_ERROR
;
1128 static const sal_uInt32 nSearchFlags
=
1129 UREGEX_DOTALL
| UREGEX_CASE_INSENSITIVE
;
1130 static const OUString
sPattern(
1131 "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
1132 static const UnicodeString
sSearchPat(
1133 reinterpret_cast<const UChar
*>(sPattern
.getStr()),
1134 sPattern
.getLength() );
1136 const OUString sOUSource
= OStringToOUString(rString
, RTL_TEXTENCODING_UTF8
);
1137 icu::UnicodeString
sSource(
1138 reinterpret_cast<const UChar
*>(
1139 sOUSource
.getStr()), sOUSource
.getLength() );
1141 RegexMatcher
aRegexMatcher( sSearchPat
, nSearchFlags
, nIcuErr
);
1142 aRegexMatcher
.reset( sSource
);
1144 icu::UnicodeString sReturn
;
1145 int32_t nEndPos
= 0;
1146 int32_t nStartPos
= 0;
1147 while( aRegexMatcher
.find(nStartPos
, nIcuErr
) && U_SUCCESS(nIcuErr
) )
1149 nStartPos
= aRegexMatcher
.start(nIcuErr
);
1150 if ( nEndPos
< nStartPos
)
1151 sReturn
.append(lcl_QuotRange(sSource
, nEndPos
, nStartPos
));
1152 nEndPos
= aRegexMatcher
.end(nIcuErr
);
1153 icu::UnicodeString sMatch
= aRegexMatcher
.group(nIcuErr
);
1154 if( lcl_isTag(sMatch
) )
1156 sReturn
.append("<");
1157 sReturn
.append(lcl_QuotRange(sSource
, nStartPos
+1, nEndPos
-1, true));
1158 sReturn
.append(">");
1161 sReturn
.append(lcl_QuotRange(sSource
, nStartPos
, nEndPos
));
1164 if( nEndPos
< sSource
.length() )
1165 sReturn
.append(lcl_QuotRange(sSource
, nEndPos
, sSource
.length()));
1166 sReturn
.append('\0');
1169 OUString(reinterpret_cast<const sal_Unicode
*>(sReturn
.getBuffer())),
1170 RTL_TEXTENCODING_UTF8
);
1173 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */