nss: upgrade to release 3.73
[LibreOffice.git] / l10ntools / source / xmlparse.cxx
blob54b9c6bf4d8eb0d5a73ef0b721db46dbb7dfe779
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include <sal/config.h>
21 #include <iterator> /* std::iterator*/
23 #include <cassert>
24 #include <stdio.h>
26 #include <helper.hxx>
27 #include <common.hxx>
28 #include <xmlparse.hxx>
29 #include <fstream>
30 #include <iostream>
31 #include <osl/thread.hxx>
32 #include <osl/process.h>
33 #include <rtl/strbuf.hxx>
34 #include <unicode/regex.h>
36 using namespace std;
37 using namespace osl;
39 #define XML_LANG "xml-lang"
40 #define ID "id"
45 XMLChildNode::XMLChildNode( XMLParentNode *pPar )
46 : m_pParent( pPar )
48 if ( m_pParent )
49 m_pParent->AddChild( this );
53 XMLChildNode::XMLChildNode( const XMLChildNode& rObj)
54 : XMLNode(rObj),
55 m_pParent(rObj.m_pParent)
59 XMLChildNode& XMLChildNode::operator=(const XMLChildNode& rObj)
61 if(this != &rObj)
63 m_pParent=rObj.m_pParent;
65 return *this;
71 XMLParentNode::~XMLParentNode()
73 if( m_pChildList )
75 RemoveAndDeleteAllChildren();
79 XMLParentNode::XMLParentNode( const XMLParentNode& rObj)
80 : XMLChildNode( rObj )
82 if( !rObj.m_pChildList )
83 return;
85 m_pChildList.reset( new XMLChildNodeList );
86 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
88 XMLChildNode* pNode = (*rObj.m_pChildList)[ i ];
89 if( pNode != nullptr)
91 switch(pNode->GetNodeType())
93 case XMLNodeType::ELEMENT:
94 AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break;
95 case XMLNodeType::DATA:
96 AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break;
97 case XMLNodeType::COMMENT:
98 AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break;
99 case XMLNodeType::DEFAULT:
100 AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break;
101 default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
107 XMLParentNode& XMLParentNode::operator=(const XMLParentNode& rObj)
109 if(this!=&rObj)
111 XMLChildNode::operator=(rObj);
112 if( m_pChildList )
114 RemoveAndDeleteAllChildren();
116 if( rObj.m_pChildList )
118 m_pChildList.reset( new XMLChildNodeList );
119 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
120 AddChild( (*rObj.m_pChildList)[ i ] );
122 else
123 m_pChildList.reset();
126 return *this;
128 void XMLParentNode::AddChild( XMLChildNode *pChild )
130 if ( !m_pChildList )
131 m_pChildList.reset( new XMLChildNodeList );
132 m_pChildList->push_back( pChild );
135 void XMLParentNode::RemoveAndDeleteAllChildren()
137 if ( m_pChildList )
139 for ( size_t i = 0; i < m_pChildList->size(); i++ )
140 delete (*m_pChildList)[ i ];
141 m_pChildList->clear();
148 void XMLFile::Write( OString const &aFilename )
150 std::ofstream s(
151 aFilename.getStr(), std::ios_base::out | std::ios_base::trunc);
152 if (!s.is_open())
154 std::cerr
155 << "Error: helpex cannot create file " << aFilename
156 << '\n';
157 std::exit(EXIT_FAILURE);
159 Write(s);
160 s.close();
163 void XMLFile::Write( ofstream &rStream , XMLNode *pCur )
165 if ( !pCur )
166 Write( rStream, this );
167 else {
168 switch( pCur->GetNodeType())
170 case XMLNodeType::XFILE:
172 if( GetChildList())
173 for ( size_t i = 0; i < GetChildList()->size(); i++ )
174 Write( rStream, (*GetChildList())[ i ] );
176 break;
177 case XMLNodeType::ELEMENT:
179 XMLElement *pElement = static_cast<XMLElement*>(pCur);
180 rStream << "<";
181 rStream << pElement->GetName();
182 if ( pElement->GetAttributeList())
183 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
185 rStream << " ";
186 OString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
187 rStream << XMLUtil::QuotHTML( sData );
188 rStream << "=\"";
189 sData = (*pElement->GetAttributeList())[ j ]->GetValue();
190 rStream << XMLUtil::QuotHTML( sData );
191 rStream << "\"";
193 if ( !pElement->GetChildList())
194 rStream << "/>";
195 else
197 rStream << ">";
198 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
199 Write( rStream, (*pElement->GetChildList())[ k ] );
200 rStream << "</";
201 rStream << pElement->GetName();
202 rStream << ">";
205 break;
206 case XMLNodeType::DATA:
208 OString sData( static_cast<const XMLData*>(pCur)->GetData());
209 rStream << XMLUtil::QuotHTML( sData );
211 break;
212 case XMLNodeType::COMMENT:
214 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
215 rStream << "<!--";
216 rStream << pComment->GetComment();
217 rStream << "-->";
219 break;
220 case XMLNodeType::DEFAULT:
222 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
223 rStream << pDefault->GetDefault();
225 break;
230 void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel )
232 if ( !pCur )
233 Print( this );
234 else
236 switch( pCur->GetNodeType())
238 case XMLNodeType::XFILE:
240 if( GetChildList())
241 for ( size_t i = 0; i < GetChildList()->size(); i++ )
242 Print( (*GetChildList())[ i ] );
244 break;
245 case XMLNodeType::ELEMENT:
247 XMLElement *pElement = static_cast<XMLElement*>(pCur);
249 fprintf( stdout, "<%s", pElement->GetName().getStr());
250 if ( pElement->GetAttributeList())
252 for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j)
254 const OString aAttrName((*pElement->GetAttributeList())[j]->GetName());
255 if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG))
257 fprintf( stdout, " %s=\"%s\"",
258 aAttrName.getStr(),
259 (*pElement->GetAttributeList())[ j ]->GetValue().getStr());
263 if ( !pElement->GetChildList())
264 fprintf( stdout, "/>" );
265 else
267 fprintf( stdout, ">" );
268 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
269 Print( (*pElement->GetChildList())[ k ], nLevel + 1 );
270 fprintf( stdout, "</%s>", pElement->GetName().getStr());
273 break;
274 case XMLNodeType::DATA:
276 const XMLData *pData = static_cast<const XMLData*>(pCur);
277 fprintf( stdout, "%s", pData->GetData().getStr());
279 break;
280 case XMLNodeType::COMMENT:
282 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
283 fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr());
285 break;
286 case XMLNodeType::DEFAULT:
288 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
289 fprintf( stdout, "%s", pDefault->GetDefault().getStr());
291 break;
295 XMLFile::~XMLFile()
297 if( m_pXMLStrings )
299 for (auto const& pos : *m_pXMLStrings)
301 delete pos.second; // Check and delete content also ?
306 XMLFile::XMLFile( const OString &rFileName ) // the file name, empty if created from memory stream
307 : XMLParentNode( nullptr )
308 , m_sFileName( rFileName )
310 m_aNodes_localize.emplace( OString("bookmark") , true );
311 m_aNodes_localize.emplace( OString("variable") , true );
312 m_aNodes_localize.emplace( OString("paragraph") , true );
313 m_aNodes_localize.emplace( OString("h1") , true );
314 m_aNodes_localize.emplace( OString("h2") , true );
315 m_aNodes_localize.emplace( OString("h3") , true );
316 m_aNodes_localize.emplace( OString("h4") , true );
317 m_aNodes_localize.emplace( OString("h5") , true );
318 m_aNodes_localize.emplace( OString("h6") , true );
319 m_aNodes_localize.emplace( OString("note") , true );
320 m_aNodes_localize.emplace( OString("tip") , true );
321 m_aNodes_localize.emplace( OString("warning") , true );
322 m_aNodes_localize.emplace( OString("alt") , true );
323 m_aNodes_localize.emplace( OString("caption") , true );
324 m_aNodes_localize.emplace( OString("title") , true );
325 m_aNodes_localize.emplace( OString("link") , true );
328 void XMLFile::Extract()
330 m_pXMLStrings.reset( new XMLHashMap );
331 SearchL10NElements( this );
334 void XMLFile::InsertL10NElement( XMLElement* pElement )
336 OString sId, sLanguage("en-US");
337 LangHashMap* pElem;
339 if( pElement->GetAttributeList() != nullptr )
341 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
343 const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName());
344 // Get the "id" Attribute
345 if (sTempStr == ID)
347 sId = (*pElement->GetAttributeList())[ j ]->GetValue();
349 // Get the "xml-lang" Attribute
350 if (sTempStr == XML_LANG)
352 sLanguage = (*pElement->GetAttributeList())[j]->GetValue();
357 else
359 fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found");
360 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
361 Print( pElement );
362 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
365 XMLHashMap::iterator pos = m_pXMLStrings->find( sId );
366 if( pos == m_pXMLStrings->end() ) // No instance, create new one
368 pElem = new LangHashMap;
369 (*pElem)[ sLanguage ]=pElement;
370 m_pXMLStrings->emplace( sId , pElem );
371 m_vOrder.push_back( sId );
373 else // Already there
375 pElem=pos->second;
376 if ( pElem->count(sLanguage) )
378 fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() );
379 exit( -1 );
381 (*pElem)[ sLanguage ]=pElement;
385 XMLFile::XMLFile( const XMLFile& rObj )
386 : XMLParentNode( rObj )
387 , m_sFileName( rObj.m_sFileName )
389 if( this != &rObj )
391 m_aNodes_localize = rObj.m_aNodes_localize;
392 m_vOrder = rObj.m_vOrder;
396 XMLFile& XMLFile::operator=(const XMLFile& rObj)
398 if( this != &rObj )
400 XMLParentNode::operator=(rObj);
402 m_aNodes_localize = rObj.m_aNodes_localize;
403 m_vOrder = rObj.m_vOrder;
405 m_pXMLStrings.reset();
407 if( rObj.m_pXMLStrings )
409 m_pXMLStrings.reset( new XMLHashMap );
410 for (auto const& pos : *rObj.m_pXMLStrings)
412 LangHashMap* pElem=pos.second;
413 LangHashMap* pNewelem = new LangHashMap;
414 for (auto const& pos2 : *pElem)
416 (*pNewelem)[ pos2.first ] = new XMLElement( *pos2.second );
418 (*m_pXMLStrings)[ pos.first ] = pNewelem;
422 return *this;
425 void XMLFile::SearchL10NElements( XMLChildNode *pCur )
427 if ( !pCur )
428 SearchL10NElements( this );
429 else
431 switch( pCur->GetNodeType())
433 case XMLNodeType::XFILE:
435 if( GetChildList())
437 for ( size_t i = 0; i < GetChildList()->size(); i++ )
439 XMLChildNode* pElement = (*GetChildList())[ i ];
440 if( pElement->GetNodeType() == XMLNodeType::ELEMENT )
441 SearchL10NElements( pElement );
445 break;
446 case XMLNodeType::ELEMENT:
448 bool bInsert = true;
449 XMLElement *pElement = static_cast<XMLElement*>(pCur);
450 const OString sName(pElement->GetName().toAsciiLowerCase());
451 OString sLanguage("en-US");
452 OString sTmpStrVal;
453 if ( pElement->GetAttributeList())
455 for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j )
457 const OString sTmpStr = (*pElement->GetAttributeList())[j]->GetName();
458 if (sTmpStr == ID)
460 sTmpStrVal=(*pElement->GetAttributeList())[ j ]->GetValue();
462 if (sTmpStr == "localize")
464 bInsert=false;
466 if (sTmpStr == XML_LANG) // Get the "xml-lang" Attribute
468 sLanguage=(*pElement->GetAttributeList())[ j ]->GetValue();
473 if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) )
474 InsertL10NElement(pElement);
475 else if ( bInsert && pElement->GetChildList() )
477 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
478 SearchL10NElements( (*pElement->GetChildList())[ k ] );
481 break;
482 default:
483 break;
488 bool XMLFile::CheckExportStatus( XMLParentNode *pCur )
490 static bool bStatusExport = true;
492 if ( !pCur )
493 CheckExportStatus( this );
494 else {
495 switch( pCur->GetNodeType())
497 case XMLNodeType::XFILE:
499 if( GetChildList())
501 for ( size_t i = 0; i < GetChildList()->size(); i++ )
503 XMLParentNode* pElement = static_cast<XMLParentNode*>((*GetChildList())[ i ]);
504 if( pElement->GetNodeType() == XMLNodeType::ELEMENT ) CheckExportStatus( pElement );//, i);
508 break;
509 case XMLNodeType::ELEMENT:
511 XMLElement *pElement = static_cast<XMLElement*>(pCur);
512 if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC"))
514 if ( pElement->GetAttributeList())
516 for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt; ++j)
518 const OString tmpStr((*pElement->GetAttributeList())[j]->GetName());
519 if (tmpStr.equalsIgnoreAsciiCase("STATUS"))
521 const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue());
522 if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") &&
523 !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED"))
525 bStatusExport = false;
532 else if ( pElement->GetChildList() )
534 for (size_t k = 0; k < pElement->GetChildList()->size(); ++k)
535 CheckExportStatus( static_cast<XMLParentNode*>((*pElement->GetChildList())[k]) );
538 break;
539 default:
540 break;
543 return bStatusExport;
546 XMLElement::XMLElement(
547 const OString &rName, // the element name
548 XMLParentNode *pParent // parent node of this element
550 : XMLParentNode( pParent )
551 , m_sElementName( rName )
555 XMLElement::XMLElement(const XMLElement& rObj)
556 : XMLParentNode( rObj )
557 , m_sElementName( rObj.m_sElementName )
559 if ( rObj.m_pAttributes )
561 m_pAttributes.reset( new XMLAttributeList );
562 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
563 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
567 XMLElement& XMLElement::operator=(const XMLElement& rObj)
569 if( this !=& rObj )
571 XMLParentNode::operator=(rObj);
572 m_sElementName = rObj.m_sElementName;
574 if ( m_pAttributes )
576 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
577 delete (*m_pAttributes)[ i ];
578 m_pAttributes.reset();
580 if ( rObj.m_pAttributes )
582 m_pAttributes.reset( new XMLAttributeList );
583 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
584 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
587 return *this;
590 void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue )
592 if ( !m_pAttributes )
593 m_pAttributes.reset( new XMLAttributeList );
594 m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) );
597 void XMLElement::ChangeLanguageTag( const OString &rValue )
599 if ( m_pAttributes )
601 bool bWasSet = false;
602 for (size_t i = 0; i < m_pAttributes->size(); ++i)
604 if ((*m_pAttributes)[ i ]->GetName() == XML_LANG)
606 (*m_pAttributes)[ i ]->setValue(rValue);
607 bWasSet = true;
611 if (!bWasSet)
612 AddAttribute(XML_LANG, rValue);
614 XMLChildNodeList* pCList = GetChildList();
616 if( !pCList )
617 return;
619 for ( size_t i = 0; i < pCList->size(); i++ )
621 XMLChildNode* pNode = (*pCList)[ i ];
622 if( pNode && pNode->GetNodeType() == XMLNodeType::ELEMENT )
624 XMLElement* pElem = static_cast< XMLElement* >(pNode);
625 pElem->ChangeLanguageTag( rValue );
626 pElem = nullptr;
627 pNode = nullptr;
630 pCList = nullptr;
633 XMLElement::~XMLElement()
635 if ( m_pAttributes )
637 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
638 delete (*m_pAttributes)[ i ];
642 OString XMLElement::ToOString()
644 OStringBuffer sBuffer;
645 Print(this,sBuffer,true);
646 return sBuffer.makeStringAndClear();
649 void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const
651 if( pCur )
653 if( bRootelement )
655 XMLElement *pElement = static_cast<XMLElement*>(pCur);
656 if ( pElement->GetAttributeList())
658 if ( pElement->GetChildList())
660 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
662 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
663 Print( pTmp, rBuffer , false);
668 else
670 switch( pCur->GetNodeType())
672 case XMLNodeType::ELEMENT:
674 XMLElement *pElement = static_cast<XMLElement*>(pCur);
676 if( !pElement->GetName().equalsIgnoreAsciiCase("comment") )
678 rBuffer.append( "<" );
679 rBuffer.append( pElement->GetName() );
680 if ( pElement->GetAttributeList())
682 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
684 const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() );
685 if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG))
687 rBuffer.append(
688 " " + aAttrName + "=\"" +
689 (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" );
693 if ( !pElement->GetChildList())
694 rBuffer.append( "/>" );
695 else
697 rBuffer.append( ">" );
698 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
700 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
701 Print( pTmp, rBuffer , false);
703 rBuffer.append( "</" + pElement->GetName() + ">" );
707 break;
708 case XMLNodeType::DATA:
710 const XMLData *pData = static_cast<const XMLData*>(pCur);
711 rBuffer.append( pData->GetData() );
713 break;
714 case XMLNodeType::COMMENT:
716 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
717 rBuffer.append( "<!--" + pComment->GetComment() + "-->" );
719 break;
720 case XMLNodeType::DEFAULT:
722 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
723 rBuffer.append( pDefault->GetDefault() );
725 break;
726 default:
727 break;
731 else
733 fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
734 return;
741 namespace
744 OUString lcl_pathnameToAbsoluteUrl(const OString& rPathname)
746 OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 );
747 OUString sUrl;
748 if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl)
749 != osl::FileBase::E_None)
751 std::cerr << "Error: Cannot convert input pathname to URL\n";
752 std::exit(EXIT_FAILURE);
754 OUString sCwd;
755 if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None)
757 std::cerr << "Error: Cannot determine cwd\n";
758 std::exit(EXIT_FAILURE);
760 if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl)
761 != osl::FileBase::E_None)
763 std::cerr << "Error: Cannot convert input URL to absolute URL\n";
764 std::exit(EXIT_FAILURE);
766 return sUrl;
771 SimpleXMLParser::SimpleXMLParser()
772 : m_pCurNode(nullptr)
773 , m_pCurData(nullptr)
775 m_aParser = XML_ParserCreate( nullptr );
776 XML_SetUserData( m_aParser, this );
777 XML_SetElementHandler( m_aParser, StartElementHandler, EndElementHandler );
778 XML_SetCharacterDataHandler( m_aParser, CharacterDataHandler );
779 XML_SetCommentHandler( m_aParser, CommentHandler );
780 XML_SetDefaultHandler( m_aParser, DefaultHandler );
783 SimpleXMLParser::~SimpleXMLParser()
785 XML_ParserFree( m_aParser );
788 void SimpleXMLParser::StartElementHandler(
789 void *userData, const XML_Char *name, const XML_Char **atts )
791 static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts );
794 void SimpleXMLParser::EndElementHandler(
795 void *userData, const XML_Char * /*name*/ )
797 static_cast<SimpleXMLParser *>(userData)->EndElement();
800 void SimpleXMLParser::CharacterDataHandler(
801 void *userData, const XML_Char *s, int len )
803 static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len );
806 void SimpleXMLParser::CommentHandler(
807 void *userData, const XML_Char *data )
809 static_cast<SimpleXMLParser *>(userData)->Comment( data );
812 void SimpleXMLParser::DefaultHandler(
813 void *userData, const XML_Char *s, int len )
815 static_cast<SimpleXMLParser *>(userData)->Default( s, len );
818 void SimpleXMLParser::StartElement(
819 const XML_Char *name, const XML_Char **atts )
821 XMLElement *pElement = new XMLElement( OString(name), m_pCurNode );
822 m_pCurNode = pElement;
823 m_pCurData = nullptr;
825 int i = 0;
826 while( atts[i] )
828 pElement->AddAttribute( atts[ i ], atts[ i + 1 ] );
829 i += 2;
833 void SimpleXMLParser::EndElement()
835 m_pCurNode = m_pCurNode->GetParent();
836 m_pCurData = nullptr;
839 void SimpleXMLParser::CharacterData( const XML_Char *s, int len )
841 if ( !m_pCurData )
843 OString x( s, len );
844 m_pCurData = new XMLData( helper::UnQuotHTML(x) , m_pCurNode );
846 else
848 OString x( s, len );
849 m_pCurData->AddData( helper::UnQuotHTML(x) );
854 void SimpleXMLParser::Comment( const XML_Char *data )
856 m_pCurData = nullptr;
857 new XMLComment( OString( data ), m_pCurNode );
860 void SimpleXMLParser::Default( const XML_Char *s, int len )
862 m_pCurData = nullptr;
863 new XMLDefault(OString( s, len ), m_pCurNode );
866 bool SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFile )
868 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
869 m_aErrorInformation.m_nLine = 0;
870 m_aErrorInformation.m_nColumn = 0;
871 m_aErrorInformation.m_sMessage = "ERROR: Unable to open file ";
872 m_aErrorInformation.m_sMessage += rFileName;
874 OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName));
876 oslFileHandle h;
877 if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read)
878 != osl_File_E_None)
880 return false;
883 sal_uInt64 s;
884 oslFileError e = osl_getFileSize(h, &s);
885 void * p = nullptr;
886 if (e == osl_File_E_None)
888 e = osl_mapFile(h, &p, s, 0, 0);
890 if (e != osl_File_E_None)
892 osl_closeFile(h);
893 return false;
896 pXMLFile->SetName( rFileName );
898 m_pCurNode = pXMLFile;
899 m_pCurData = nullptr;
901 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
902 m_aErrorInformation.m_nLine = 0;
903 m_aErrorInformation.m_nColumn = 0;
904 if ( !pXMLFile->GetName().isEmpty())
906 m_aErrorInformation.m_sMessage = "File " + pXMLFile->GetName() + " parsed successfully";
908 else
909 m_aErrorInformation.m_sMessage = "XML-File parsed successfully";
911 bool result = XML_Parse(m_aParser, static_cast< char * >(p), s, true);
912 if (!result)
914 m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser );
915 m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser );
916 m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser );
918 m_aErrorInformation.m_sMessage = "ERROR: ";
919 if ( !pXMLFile->GetName().isEmpty())
920 m_aErrorInformation.m_sMessage += pXMLFile->GetName();
921 else
922 m_aErrorInformation.m_sMessage += "XML-File (";
924 m_aErrorInformation.m_sMessage +=
925 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," +
926 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): ";
928 switch (m_aErrorInformation.m_eCode)
930 case XML_ERROR_NO_MEMORY:
931 m_aErrorInformation.m_sMessage += "No memory";
932 break;
933 case XML_ERROR_SYNTAX:
934 m_aErrorInformation.m_sMessage += "Syntax";
935 break;
936 case XML_ERROR_NO_ELEMENTS:
937 m_aErrorInformation.m_sMessage += "No elements";
938 break;
939 case XML_ERROR_INVALID_TOKEN:
940 m_aErrorInformation.m_sMessage += "Invalid token";
941 break;
942 case XML_ERROR_UNCLOSED_TOKEN:
943 m_aErrorInformation.m_sMessage += "Unclosed token";
944 break;
945 case XML_ERROR_PARTIAL_CHAR:
946 m_aErrorInformation.m_sMessage += "Partial char";
947 break;
948 case XML_ERROR_TAG_MISMATCH:
949 m_aErrorInformation.m_sMessage += "Tag mismatch";
950 break;
951 case XML_ERROR_DUPLICATE_ATTRIBUTE:
952 m_aErrorInformation.m_sMessage += "Duplicated attribute";
953 break;
954 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
955 m_aErrorInformation.m_sMessage += "Junk after doc element";
956 break;
957 case XML_ERROR_PARAM_ENTITY_REF:
958 m_aErrorInformation.m_sMessage += "Param entity ref";
959 break;
960 case XML_ERROR_UNDEFINED_ENTITY:
961 m_aErrorInformation.m_sMessage += "Undefined entity";
962 break;
963 case XML_ERROR_RECURSIVE_ENTITY_REF:
964 m_aErrorInformation.m_sMessage += "Recursive entity ref";
965 break;
966 case XML_ERROR_ASYNC_ENTITY:
967 m_aErrorInformation.m_sMessage += "Async_entity";
968 break;
969 case XML_ERROR_BAD_CHAR_REF:
970 m_aErrorInformation.m_sMessage += "Bad char ref";
971 break;
972 case XML_ERROR_BINARY_ENTITY_REF:
973 m_aErrorInformation.m_sMessage += "Binary entity";
974 break;
975 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
976 m_aErrorInformation.m_sMessage += "Attribute external entity ref";
977 break;
978 case XML_ERROR_MISPLACED_XML_PI:
979 m_aErrorInformation.m_sMessage += "Misplaced xml pi";
980 break;
981 case XML_ERROR_UNKNOWN_ENCODING:
982 m_aErrorInformation.m_sMessage += "Unknown encoding";
983 break;
984 case XML_ERROR_INCORRECT_ENCODING:
985 m_aErrorInformation.m_sMessage += "Incorrect encoding";
986 break;
987 case XML_ERROR_UNCLOSED_CDATA_SECTION:
988 m_aErrorInformation.m_sMessage += "Unclosed cdata section";
989 break;
990 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
991 m_aErrorInformation.m_sMessage += "External entity handling";
992 break;
993 case XML_ERROR_NOT_STANDALONE:
994 m_aErrorInformation.m_sMessage += "Not standalone";
995 break;
996 case XML_ERROR_NONE:
997 break;
998 default:
999 break;
1003 osl_unmapMappedFile(h, p, s);
1004 osl_closeFile(h);
1006 return result;
1009 namespace
1012 icu::UnicodeString lcl_QuotRange(
1013 const icu::UnicodeString& rString, const sal_Int32 nStart,
1014 const sal_Int32 nEnd, bool bInsideTag = false )
1016 icu::UnicodeString sReturn;
1017 assert( nStart < nEnd );
1018 assert( nStart >= 0 );
1019 assert( nEnd <= rString.length() );
1020 for (sal_Int32 i = nStart; i < nEnd; ++i)
1022 switch (rString[i])
1024 case '<':
1025 sReturn.append("&lt;");
1026 break;
1027 case '>':
1028 sReturn.append("&gt;");
1029 break;
1030 case '"':
1031 if( !bInsideTag )
1032 sReturn.append("&quot;");
1033 else
1034 sReturn.append(rString[i]);
1035 break;
1036 case '&':
1037 if (rString.startsWith("&amp;", i, 5))
1038 sReturn.append('&');
1039 else
1040 sReturn.append("&amp;");
1041 break;
1042 default:
1043 sReturn.append(rString[i]);
1044 break;
1047 return sReturn;
1050 bool lcl_isTag( const icu::UnicodeString& rString )
1052 static const int nSize = 20;
1053 static const icu::UnicodeString vTags[nSize] = {
1054 "ahelp", "link", "item", "emph", "defaultinline",
1055 "switchinline", "caseinline", "variable",
1056 "bookmark_value", "image", "object",
1057 "embedvar", "alt", "sup", "sub",
1058 "menuitem", "keycode", "input", "literal", "widget"
1061 for( int nIndex = 0; nIndex < nSize; ++nIndex )
1063 if( rString.startsWith("<" + vTags[nIndex]) ||
1064 rString == "</" + vTags[nIndex] + ">" )
1065 return true;
1068 return rString == "<br/>" || rString =="<help-id-missing/>";
1071 } /// anonymous namespace
1073 OString XMLUtil::QuotHTML( const OString &rString )
1075 if( rString.trim().isEmpty() )
1076 return rString;
1077 UErrorCode nIcuErr = U_ZERO_ERROR;
1078 static const sal_uInt32 nSearchFlags =
1079 UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
1080 static const icu::UnicodeString sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" );
1082 const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8);
1083 icu::UnicodeString sSource(
1084 reinterpret_cast<const UChar*>(
1085 sOUSource.getStr()), sOUSource.getLength() );
1087 icu::RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
1088 aRegexMatcher.reset( sSource );
1090 icu::UnicodeString sReturn;
1091 int32_t nEndPos = 0;
1092 int32_t nStartPos = 0;
1093 while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) )
1095 nStartPos = aRegexMatcher.start(nIcuErr);
1096 if ( nEndPos < nStartPos )
1097 sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos));
1098 nEndPos = aRegexMatcher.end(nIcuErr);
1099 icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
1100 if( lcl_isTag(sMatch) )
1102 sReturn.append("<");
1103 sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true));
1104 sReturn.append(">");
1106 else
1107 sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos));
1108 nStartPos = nEndPos;
1110 if( nEndPos < sSource.length() )
1111 sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()));
1112 sReturn.append('\0');
1113 return
1114 OUStringToOString(
1115 OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer())),
1116 RTL_TEXTENCODING_UTF8);
1119 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */