update emoji autocorrect entries from po-files
[LibreOffice.git] / l10ntools / source / xmlparse.cxx
blob7cd1db67dce30f939617848c9aa56b249aff9ae8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include "sal/config.h"
21 #include <iterator> /* std::iterator*/
23 #include <cassert>
24 #include <stdio.h>
25 #include <sal/alloca.h>
27 #include "helper.hxx"
28 #include "common.hxx"
29 #include "xmlparse.hxx"
30 #include <fstream>
31 #include <iostream>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.hxx>
34 #include <osl/process.h>
35 #include <rtl/strbuf.hxx>
36 #include <unicode/regex.h>
38 using namespace U_ICU_NAMESPACE;
39 using namespace std;
40 using namespace osl;
42 #define XML_LANG "xml-lang"
43 #define ID "id"
44 #define OLDREF "oldref"
47 // class XMLChildNode
50 XMLChildNode::XMLChildNode( XMLParentNode *pPar )
51 : m_pParent( pPar )
53 if ( m_pParent )
54 m_pParent->AddChild( this );
58 XMLChildNode::XMLChildNode( const XMLChildNode& rObj)
59 : XMLNode(rObj),
60 m_pParent(rObj.m_pParent)
64 XMLChildNode& XMLChildNode::operator=(const XMLChildNode& rObj)
66 if(this != &rObj)
68 m_pParent=rObj.m_pParent;
70 return *this;
74 // class XMLParentNode
77 XMLParentNode::~XMLParentNode()
79 if( m_pChildList )
81 RemoveAndDeleteAllChildren();
82 delete m_pChildList;
84 m_pChildList = NULL;
86 XMLParentNode::XMLParentNode( const XMLParentNode& rObj)
87 : XMLChildNode( rObj )
89 if( rObj.m_pChildList )
91 m_pChildList=new XMLChildNodeList();
92 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
94 XMLChildNode* pNode = (*rObj.m_pChildList)[ i ];
95 if( pNode != NULL)
97 switch(pNode->GetNodeType())
99 case XML_NODE_TYPE_ELEMENT:
100 AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break;
101 case XML_NODE_TYPE_DATA:
102 AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break;
103 case XML_NODE_TYPE_COMMENT:
104 AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break;
105 case XML_NODE_TYPE_DEFAULT:
106 AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break;
107 default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
112 else
113 m_pChildList = NULL;
115 XMLParentNode& XMLParentNode::operator=(const XMLParentNode& rObj)
117 if(this!=&rObj)
119 XMLChildNode::operator=(rObj);
120 if( m_pChildList )
122 RemoveAndDeleteAllChildren();
123 delete m_pChildList;
124 m_pChildList = NULL;
126 if( rObj.m_pChildList )
128 m_pChildList=new XMLChildNodeList();
129 for ( size_t i = 0; i < rObj.m_pChildList->size(); i++ )
130 AddChild( (*rObj.m_pChildList)[ i ] );
132 else
133 m_pChildList = NULL;
136 return *this;
138 void XMLParentNode::AddChild( XMLChildNode *pChild )
140 if ( !m_pChildList )
141 m_pChildList = new XMLChildNodeList();
142 m_pChildList->push_back( pChild );
145 void XMLParentNode::RemoveAndDeleteAllChildren()
147 if ( m_pChildList )
149 for ( size_t i = 0; i < m_pChildList->size(); i++ )
150 delete (*m_pChildList)[ i ];
151 m_pChildList->clear();
156 // class XMLFile
159 void XMLFile::Write( OString const &aFilename )
161 std::ofstream s(
162 aFilename.getStr(), std::ios_base::out | std::ios_base::trunc);
163 if (!s.is_open())
165 std::cerr
166 << "Error: helpex cannot create file " << aFilename.getStr()
167 << '\n';
168 std::exit(EXIT_FAILURE);
170 Write(s);
171 s.close();
174 bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
176 if ( !pCur )
177 Write( rStream, this );
178 else {
179 switch( pCur->GetNodeType())
181 case XML_NODE_TYPE_FILE:
183 if( GetChildList())
184 for ( size_t i = 0; i < GetChildList()->size(); i++ )
185 Write( rStream, (*GetChildList())[ i ] );
187 break;
188 case XML_NODE_TYPE_ELEMENT:
190 XMLElement *pElement = static_cast<XMLElement*>(pCur);
191 rStream << "<";
192 rStream << pElement->GetName().getStr();
193 if ( pElement->GetAttributeList())
194 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
196 rStream << " ";
197 OString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
198 rStream << XMLUtil::QuotHTML( sData ).getStr();
199 rStream << "=\"";
200 sData = (*pElement->GetAttributeList())[ j ]->GetValue();
201 rStream << XMLUtil::QuotHTML( sData ).getStr();
202 rStream << "\"";
204 if ( !pElement->GetChildList())
205 rStream << "/>";
206 else
208 rStream << ">";
209 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
210 Write( rStream, (*pElement->GetChildList())[ k ] );
211 rStream << "</";
212 rStream << pElement->GetName().getStr();
213 rStream << ">";
216 break;
217 case XML_NODE_TYPE_DATA:
219 OString sData( static_cast<const XMLData*>(pCur)->GetData());
220 rStream << XMLUtil::QuotHTML( sData ).getStr();
222 break;
223 case XML_NODE_TYPE_COMMENT:
225 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
226 rStream << "<!--";
227 rStream << pComment->GetComment().getStr();
228 rStream << "-->";
230 break;
231 case XML_NODE_TYPE_DEFAULT:
233 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
234 rStream << pDefault->GetDefault().getStr();
236 break;
239 return true;
242 void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel )
244 if ( !pCur )
245 Print( this );
246 else
248 switch( pCur->GetNodeType())
250 case XML_NODE_TYPE_FILE:
252 if( GetChildList())
253 for ( size_t i = 0; i < GetChildList()->size(); i++ )
254 Print( (*GetChildList())[ i ] );
256 break;
257 case XML_NODE_TYPE_ELEMENT:
259 XMLElement *pElement = static_cast<XMLElement*>(pCur);
261 fprintf( stdout, "<%s", pElement->GetName().getStr());
262 if ( pElement->GetAttributeList())
264 for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j)
266 const OString aAttrName((*pElement->GetAttributeList())[j]->GetName());
267 if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG))
269 fprintf( stdout, " %s=\"%s\"",
270 aAttrName.getStr(),
271 (*pElement->GetAttributeList())[ j ]->GetValue().getStr());
275 if ( !pElement->GetChildList())
276 fprintf( stdout, "/>" );
277 else
279 fprintf( stdout, ">" );
280 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
281 Print( (*pElement->GetChildList())[ k ], nLevel + 1 );
282 fprintf( stdout, "</%s>", pElement->GetName().getStr());
285 break;
286 case XML_NODE_TYPE_DATA:
288 const XMLData *pData = static_cast<const XMLData*>(pCur);
289 fprintf( stdout, "%s", pData->GetData().getStr());
291 break;
292 case XML_NODE_TYPE_COMMENT:
294 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
295 fprintf( stdout, "<!--%s-->", pComment->GetComment().getStr());
297 break;
298 case XML_NODE_TYPE_DEFAULT:
300 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
301 fprintf( stdout, "%s", pDefault->GetDefault().getStr());
303 break;
307 XMLFile::~XMLFile()
309 if( m_pXMLStrings != NULL )
311 XMLHashMap::iterator pos = m_pXMLStrings->begin();
312 for( ; pos != m_pXMLStrings->end() ; ++pos )
314 delete pos->second; // Check and delete content also ?
316 delete m_pXMLStrings;
317 m_pXMLStrings = NULL;
320 XMLFile::XMLFile( const OString &rFileName ) // the file name, empty if created from memory stream
321 : XMLParentNode( NULL )
322 , m_sFileName( rFileName )
323 , m_pXMLStrings( NULL )
325 m_aNodes_localize.insert( TagMap::value_type(OString("bookmark") , sal_True) );
326 m_aNodes_localize.insert( TagMap::value_type(OString("variable") , sal_True) );
327 m_aNodes_localize.insert( TagMap::value_type(OString("paragraph") , sal_True) );
328 m_aNodes_localize.insert( TagMap::value_type(OString("alt") , sal_True) );
329 m_aNodes_localize.insert( TagMap::value_type(OString("caption") , sal_True) );
330 m_aNodes_localize.insert( TagMap::value_type(OString("title") , sal_True) );
331 m_aNodes_localize.insert( TagMap::value_type(OString("link") , sal_True) );
334 void XMLFile::Extract( XMLFile *pCur )
336 if( m_pXMLStrings )
337 delete m_pXMLStrings; // Elements ?
339 m_pXMLStrings = new XMLHashMap();
340 if ( !pCur )
341 SearchL10NElements( this );
342 else
344 if( pCur->GetNodeType() == XML_NODE_TYPE_FILE )
346 SearchL10NElements(pCur);
351 void XMLFile::InsertL10NElement( XMLElement* pElement )
353 OString sId, sLanguage("");
354 LangHashMap* pElem;
356 if( pElement->GetAttributeList() != NULL )
358 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
360 const OString sTempStr((*pElement->GetAttributeList())[ j ]->GetName());
361 // Get the "id" Attribute
362 if (sTempStr == ID)
364 sId = (*pElement->GetAttributeList())[ j ]->GetValue();
366 // Get the "xml-lang" Attribute
367 if (sTempStr == XML_LANG)
369 sLanguage = (*pElement->GetAttributeList())[j]->GetValue();
374 else
376 fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found");
377 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
378 Print( pElement , 0 );
379 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
382 XMLHashMap::iterator pos = m_pXMLStrings->find( sId );
383 if( pos == m_pXMLStrings->end() ) // No instanze , create new one
385 pElem = new LangHashMap();
386 (*pElem)[ sLanguage ]=pElement;
387 m_pXMLStrings->insert( XMLHashMap::value_type( sId , pElem ) );
388 m_vOrder.push_back( sId );
390 else // Already there
392 pElem=pos->second;
393 if ( (*pElem)[ sLanguage ] )
395 fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId.getStr(), sLanguage.getStr(), m_sFileName.getStr() );
396 exit( -1 );
398 (*pElem)[ sLanguage ]=pElement;
402 XMLFile::XMLFile( const XMLFile& rObj )
403 : XMLParentNode( rObj )
404 , m_sFileName( rObj.m_sFileName )
405 , m_pXMLStrings( 0 )
407 if( this != &rObj )
409 m_aNodes_localize = rObj.m_aNodes_localize;
410 m_vOrder = rObj.m_vOrder;
414 XMLFile& XMLFile::operator=(const XMLFile& rObj)
416 if( this != &rObj )
418 XMLParentNode::operator=(rObj);
420 m_aNodes_localize = rObj.m_aNodes_localize;
421 m_vOrder = rObj.m_vOrder;
423 if( m_pXMLStrings )
424 delete m_pXMLStrings;
426 if( rObj.m_pXMLStrings )
428 m_pXMLStrings = new XMLHashMap();
429 for( XMLHashMap::iterator pos = rObj.m_pXMLStrings->begin() ; pos != rObj.m_pXMLStrings->end() ; ++pos )
431 LangHashMap* pElem=pos->second;
432 LangHashMap* pNewelem = new LangHashMap();
433 for(LangHashMap::iterator pos2=pElem->begin(); pos2!=pElem->end();++pos2)
435 (*pNewelem)[ pos2->first ] = new XMLElement( *pos2->second );
437 (*m_pXMLStrings)[ pos->first ] = pNewelem;
441 return *this;
444 void XMLFile::SearchL10NElements( XMLChildNode *pCur, int nPos )
446 bool bInsert = true;
447 if ( !pCur )
448 SearchL10NElements( this );
449 else
451 switch( pCur->GetNodeType())
453 case XML_NODE_TYPE_FILE:
455 XMLChildNode* pElement;
456 if( GetChildList())
458 for ( size_t i = 0; i < GetChildList()->size(); i++ )
460 pElement = (*GetChildList())[ i ];
461 if( pElement->GetNodeType() == XML_NODE_TYPE_ELEMENT )
462 SearchL10NElements( pElement , i);
466 break;
467 case XML_NODE_TYPE_ELEMENT:
469 XMLElement *pElement = static_cast<XMLElement*>(pCur);
470 const OString sName(pElement->GetName().toAsciiLowerCase());
471 OString sLanguage, sTmpStrVal, sOldref;
472 if ( pElement->GetAttributeList())
474 for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j )
476 const OString sTmpStr = (*pElement->GetAttributeList())[j]->GetName();
477 if (sTmpStr == ID)
479 sTmpStrVal=(*pElement->GetAttributeList())[ j ]->GetValue();
481 if (sTmpStr == "localize")
483 bInsert=false;
485 if (sTmpStr == XML_LANG) // Get the "xml-lang" Attribute
487 sLanguage=(*pElement->GetAttributeList())[ j ]->GetValue();
489 if (sTmpStr == OLDREF) // Get the "oldref" Attribute
491 sOldref=(*pElement->GetAttributeList())[ j ]->GetValue();
494 pElement->SetLanguageId( sLanguage );
495 pElement->SetId( sTmpStrVal );
496 pElement->SetOldRef( sOldref );
497 pElement->SetPos( nPos );
500 if ( bInsert && ( m_aNodes_localize.find( sName ) != m_aNodes_localize.end() ) )
501 InsertL10NElement(pElement);
502 else if ( bInsert && pElement->GetChildList() )
504 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
505 SearchL10NElements( (*pElement->GetChildList())[ k ], k);
508 break;
509 case XML_NODE_TYPE_DATA:
510 break;
511 case XML_NODE_TYPE_COMMENT:
512 break;
513 case XML_NODE_TYPE_DEFAULT:
514 break;
519 bool XMLFile::CheckExportStatus( XMLParentNode *pCur )
521 static bool bStatusExport = true;
523 bool bInsert = true;
524 if ( !pCur )
525 CheckExportStatus( this );
526 else {
527 switch( pCur->GetNodeType())
529 case XML_NODE_TYPE_FILE:
531 if( GetChildList())
533 for ( size_t i = 0; i < GetChildList()->size(); i++ )
535 XMLParentNode* pElement = static_cast<XMLParentNode*>((*GetChildList())[ i ]);
536 if( pElement->GetNodeType() == XML_NODE_TYPE_ELEMENT ) CheckExportStatus( pElement );//, i);
540 break;
541 case XML_NODE_TYPE_ELEMENT:
543 XMLElement *pElement = static_cast<XMLElement*>(pCur);
544 if (pElement->GetName().equalsIgnoreAsciiCase("TOPIC"))
546 if ( pElement->GetAttributeList())
548 for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j)
550 const OString tmpStr((*pElement->GetAttributeList())[j]->GetName());
551 if (tmpStr.equalsIgnoreAsciiCase("STATUS"))
553 const OString tmpStrVal((*pElement->GetAttributeList())[j]->GetValue());
554 if (!tmpStrVal.equalsIgnoreAsciiCase("PUBLISH") &&
555 !tmpStrVal.equalsIgnoreAsciiCase("DEPRECATED"))
557 bStatusExport = false;
564 else if ( pElement->GetChildList() )
566 for (size_t k = 0; k < pElement->GetChildList()->size(); ++k)
567 CheckExportStatus( static_cast<XMLParentNode*>((*pElement->GetChildList())[k]) );
570 break;
573 return bStatusExport;
576 XMLElement::XMLElement(
577 const OString &rName, // the element name
578 XMLParentNode *pParent // parent node of this element
580 : XMLParentNode( pParent )
581 , m_sElementName( rName )
582 , m_pAttributes( NULL )
583 , m_sProject(OString())
584 , m_sFilename(OString())
585 , m_sId(OString())
586 , m_sOldRef(OString())
587 , m_sResourceType(OString())
588 , m_sLanguageId(OString())
589 , m_nPos(0)
593 XMLElement::XMLElement(const XMLElement& rObj)
594 : XMLParentNode( rObj )
595 , m_sElementName( rObj.m_sElementName )
596 , m_pAttributes( 0 )
597 , m_sProject( rObj.m_sProject )
598 , m_sFilename( rObj.m_sFilename )
599 , m_sId( rObj.m_sId )
600 , m_sOldRef( rObj.m_sOldRef )
601 , m_sResourceType( rObj.m_sResourceType )
602 , m_sLanguageId( rObj.m_sLanguageId )
603 , m_nPos( rObj.m_nPos )
605 if ( rObj.m_pAttributes )
607 m_pAttributes = new XMLAttributeList();
608 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
609 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
613 XMLElement& XMLElement::operator=(const XMLElement& rObj)
615 if( this !=& rObj )
617 XMLParentNode::operator=(rObj);
618 m_sElementName = rObj.m_sElementName;
619 m_sProject = rObj.m_sProject;
620 m_sFilename = rObj.m_sFilename;
621 m_sId = rObj.m_sId;
622 m_sOldRef = rObj.m_sOldRef;
623 m_sResourceType = rObj.m_sResourceType;
624 m_sLanguageId = rObj.m_sLanguageId;
625 m_nPos = rObj.m_nPos;
627 if ( m_pAttributes )
629 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
630 delete (*m_pAttributes)[ i ];
631 delete m_pAttributes;
633 if ( rObj.m_pAttributes )
635 m_pAttributes = new XMLAttributeList();
636 for ( size_t i = 0; i < rObj.m_pAttributes->size(); i++ )
637 AddAttribute( (*rObj.m_pAttributes)[ i ]->GetName(), (*rObj.m_pAttributes)[ i ]->GetValue() );
640 return *this;
643 void XMLElement::AddAttribute( const OString &rAttribute, const OString &rValue )
645 if ( !m_pAttributes )
646 m_pAttributes = new XMLAttributeList();
647 m_pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) );
650 void XMLElement::ChangeLanguageTag( const OString &rValue )
652 SetLanguageId(rValue);
653 if ( m_pAttributes )
655 for (size_t i = 0; i < m_pAttributes->size(); ++i)
657 if ( (*m_pAttributes)[ i ]->GetName() == "xml-lang" )
658 (*m_pAttributes)[ i ]->setValue(rValue);
661 XMLChildNodeList* pCList = GetChildList();
663 if( pCList )
665 for ( size_t i = 0; i < pCList->size(); i++ )
667 XMLChildNode* pNode = (*pCList)[ i ];
668 if( pNode && pNode->GetNodeType() == XML_NODE_TYPE_ELEMENT )
670 XMLElement* pElem = static_cast< XMLElement* >(pNode);
671 pElem->ChangeLanguageTag( rValue );
672 pElem->SetLanguageId(rValue);
673 pElem = NULL;
674 pNode = NULL;
677 pCList = NULL;
681 XMLElement::~XMLElement()
683 if ( m_pAttributes )
685 for ( size_t i = 0; i < m_pAttributes->size(); i++ )
686 delete (*m_pAttributes)[ i ];
688 delete m_pAttributes;
689 m_pAttributes = NULL;
693 OString XMLElement::ToOString()
695 OStringBuffer sBuffer;
696 Print(this,sBuffer,true);
697 return sBuffer.makeStringAndClear();
700 void XMLElement::Print(XMLNode *pCur, OStringBuffer& rBuffer, bool bRootelement ) const
702 if( pCur )
704 if( bRootelement )
706 XMLElement *pElement = static_cast<XMLElement*>(pCur);
707 if ( pElement->GetAttributeList())
709 if ( pElement->GetChildList())
711 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
713 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
714 Print( pTmp, rBuffer , false);
719 else
721 switch( pCur->GetNodeType())
723 case XML_NODE_TYPE_ELEMENT:
725 XMLElement *pElement = static_cast<XMLElement*>(pCur);
727 if( !pElement->GetName().equalsIgnoreAsciiCase("comment") )
729 rBuffer.append( "<" );
730 rBuffer.append( pElement->GetName() );
731 if ( pElement->GetAttributeList())
733 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
735 const OString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() );
736 if( !aAttrName.equalsIgnoreAsciiCase( "xml-lang" ) )
738 rBuffer.append(
739 " " + aAttrName + "=\"" +
740 (*pElement->GetAttributeList())[ j ]->GetValue() + "\"" );
744 if ( !pElement->GetChildList())
745 rBuffer.append( "/>" );
746 else
748 rBuffer.append( ">" );
749 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
751 XMLChildNode* pTmp = (*pElement->GetChildList())[ k ];
752 Print( pTmp, rBuffer , false);
754 rBuffer.append( "</" + pElement->GetName() + ">" );
758 break;
759 case XML_NODE_TYPE_DATA:
761 const XMLData *pData = static_cast<const XMLData*>(pCur);
762 rBuffer.append( pData->GetData() );
764 break;
765 case XML_NODE_TYPE_COMMENT:
767 const XMLComment *pComment = static_cast<const XMLComment*>(pCur);
768 rBuffer.append( "<!--" + pComment->GetComment() + "-->" );
770 break;
771 case XML_NODE_TYPE_DEFAULT:
773 const XMLDefault *pDefault = static_cast<const XMLDefault*>(pCur);
774 rBuffer.append( pDefault->GetDefault() );
776 break;
780 else
782 fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
783 return;
788 // class SimpleXMLParser
791 namespace
794 static OUString lcl_pathnameToAbsoluteUrl(const OString& rPathname)
796 OUString sPath = OStringToOUString(rPathname, RTL_TEXTENCODING_UTF8 );
797 OUString sUrl;
798 if (osl::FileBase::getFileURLFromSystemPath(sPath, sUrl)
799 != osl::FileBase::E_None)
801 std::cerr << "Error: Cannot convert input pathname to URL\n";
802 std::exit(EXIT_FAILURE);
804 OUString sCwd;
805 if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None)
807 std::cerr << "Error: Cannot determine cwd\n";
808 std::exit(EXIT_FAILURE);
810 if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl)
811 != osl::FileBase::E_None)
813 std::cerr << "Error: Cannot convert input URL to absolute URL\n";
814 std::exit(EXIT_FAILURE);
816 return sUrl;
821 SimpleXMLParser::SimpleXMLParser()
822 : m_pXMLFile(NULL)
823 , m_pCurNode(NULL)
824 , m_pCurData(NULL)
826 m_aParser = XML_ParserCreate( NULL );
827 XML_SetUserData( m_aParser, this );
828 XML_SetElementHandler( m_aParser, (XML_StartElementHandler) StartElementHandler, (XML_EndElementHandler) EndElementHandler );
829 XML_SetCharacterDataHandler( m_aParser, (XML_CharacterDataHandler) CharacterDataHandler );
830 XML_SetCommentHandler( m_aParser, (XML_CommentHandler) CommentHandler );
831 XML_SetDefaultHandler( m_aParser, (XML_DefaultHandler) DefaultHandler );
834 SimpleXMLParser::~SimpleXMLParser()
836 XML_ParserFree( m_aParser );
839 void SimpleXMLParser::StartElementHandler(
840 void *userData, const XML_Char *name, const XML_Char **atts )
842 static_cast<SimpleXMLParser *>(userData)->StartElement( name, atts );
845 void SimpleXMLParser::EndElementHandler(
846 void *userData, const XML_Char *name )
848 static_cast<SimpleXMLParser *>(userData)->EndElement( name );
851 void SimpleXMLParser::CharacterDataHandler(
852 void *userData, const XML_Char *s, int len )
854 static_cast<SimpleXMLParser *>(userData)->CharacterData( s, len );
857 void SimpleXMLParser::CommentHandler(
858 void *userData, const XML_Char *data )
860 static_cast<SimpleXMLParser *>(userData)->Comment( data );
863 void SimpleXMLParser::DefaultHandler(
864 void *userData, const XML_Char *s, int len )
866 static_cast<SimpleXMLParser *>(userData)->Default( s, len );
869 void SimpleXMLParser::StartElement(
870 const XML_Char *name, const XML_Char **atts )
872 XMLElement *pElement = new XMLElement( OString(name), m_pCurNode );
873 m_pCurNode = pElement;
874 m_pCurData = NULL;
876 int i = 0;
877 while( atts[i] )
879 pElement->AddAttribute( atts[ i ], atts[ i + 1 ] );
880 i += 2;
884 void SimpleXMLParser::EndElement( const XML_Char * /*name*/ )
886 m_pCurNode = m_pCurNode->GetParent();
887 m_pCurData = NULL;
890 void SimpleXMLParser::CharacterData( const XML_Char *s, int len )
892 if ( !m_pCurData )
894 OString x( s, len );
895 m_pCurData = new XMLData( helper::UnQuotHTML(x) , m_pCurNode );
897 else
899 OString x( s, len );
900 m_pCurData->AddData( helper::UnQuotHTML(x) );
905 void SimpleXMLParser::Comment( const XML_Char *data )
907 m_pCurData = NULL;
908 new XMLComment( OString( data ), m_pCurNode );
911 void SimpleXMLParser::Default( const XML_Char *s, int len )
913 m_pCurData = NULL;
914 new XMLDefault(OString( s, len ), m_pCurNode );
917 XMLFile *SimpleXMLParser::Execute( const OString &rFileName, XMLFile* pXMLFileIn )
919 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
920 m_aErrorInformation.m_nLine = 0;
921 m_aErrorInformation.m_nColumn = 0;
922 m_aErrorInformation.m_sMessage = "ERROR: Unable to open file ";
923 m_aErrorInformation.m_sMessage += rFileName;
925 OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName));
927 oslFileHandle h;
928 if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read)
929 != osl_File_E_None)
931 return 0;
934 sal_uInt64 s;
935 oslFileError e = osl_getFileSize(h, &s);
936 void * p = NULL;
937 if (e == osl_File_E_None)
939 e = osl_mapFile(h, &p, s, 0, 0);
941 if (e != osl_File_E_None)
943 osl_closeFile(h);
944 return 0;
947 m_pXMLFile = pXMLFileIn;
948 m_pXMLFile->SetName( rFileName );
950 m_pCurNode = m_pXMLFile;
951 m_pCurData = NULL;
953 m_aErrorInformation.m_eCode = XML_ERROR_NONE;
954 m_aErrorInformation.m_nLine = 0;
955 m_aErrorInformation.m_nColumn = 0;
956 if ( !m_pXMLFile->GetName().isEmpty())
958 m_aErrorInformation.m_sMessage = "File " + m_pXMLFile->GetName() + " parsed successfully";
960 else
961 m_aErrorInformation.m_sMessage = "XML-File parsed successfully";
963 if (!XML_Parse(m_aParser, static_cast< char * >(p), s, true))
965 m_aErrorInformation.m_eCode = XML_GetErrorCode( m_aParser );
966 m_aErrorInformation.m_nLine = XML_GetErrorLineNumber( m_aParser );
967 m_aErrorInformation.m_nColumn = XML_GetErrorColumnNumber( m_aParser );
969 m_aErrorInformation.m_sMessage = "ERROR: ";
970 if ( !m_pXMLFile->GetName().isEmpty())
971 m_aErrorInformation.m_sMessage += m_pXMLFile->GetName();
972 else
973 m_aErrorInformation.m_sMessage += OString( "XML-File (");
975 m_aErrorInformation.m_sMessage +=
976 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nLine)) + "," +
977 OString::number(sal::static_int_cast< sal_Int64 >(m_aErrorInformation.m_nColumn)) + "): ";
979 switch (m_aErrorInformation.m_eCode)
981 case XML_ERROR_NO_MEMORY:
982 m_aErrorInformation.m_sMessage += "No memory";
983 break;
984 case XML_ERROR_SYNTAX:
985 m_aErrorInformation.m_sMessage += "Syntax";
986 break;
987 case XML_ERROR_NO_ELEMENTS:
988 m_aErrorInformation.m_sMessage += "No elements";
989 break;
990 case XML_ERROR_INVALID_TOKEN:
991 m_aErrorInformation.m_sMessage += "Invalid token";
992 break;
993 case XML_ERROR_UNCLOSED_TOKEN:
994 m_aErrorInformation.m_sMessage += "Unclosed token";
995 break;
996 case XML_ERROR_PARTIAL_CHAR:
997 m_aErrorInformation.m_sMessage += "Partial char";
998 break;
999 case XML_ERROR_TAG_MISMATCH:
1000 m_aErrorInformation.m_sMessage += "Tag mismatch";
1001 break;
1002 case XML_ERROR_DUPLICATE_ATTRIBUTE:
1003 m_aErrorInformation.m_sMessage += "Dublicat attribute";
1004 break;
1005 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
1006 m_aErrorInformation.m_sMessage += "Junk after doc element";
1007 break;
1008 case XML_ERROR_PARAM_ENTITY_REF:
1009 m_aErrorInformation.m_sMessage += "Param entity ref";
1010 break;
1011 case XML_ERROR_UNDEFINED_ENTITY:
1012 m_aErrorInformation.m_sMessage += "Undefined entity";
1013 break;
1014 case XML_ERROR_RECURSIVE_ENTITY_REF:
1015 m_aErrorInformation.m_sMessage += "Recursive entity ref";
1016 break;
1017 case XML_ERROR_ASYNC_ENTITY:
1018 m_aErrorInformation.m_sMessage += "Async_entity";
1019 break;
1020 case XML_ERROR_BAD_CHAR_REF:
1021 m_aErrorInformation.m_sMessage += "Bad char ref";
1022 break;
1023 case XML_ERROR_BINARY_ENTITY_REF:
1024 m_aErrorInformation.m_sMessage += "Binary entity";
1025 break;
1026 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
1027 m_aErrorInformation.m_sMessage += "Attribute external entity ref";
1028 break;
1029 case XML_ERROR_MISPLACED_XML_PI:
1030 m_aErrorInformation.m_sMessage += "Misplaced xml pi";
1031 break;
1032 case XML_ERROR_UNKNOWN_ENCODING:
1033 m_aErrorInformation.m_sMessage += "Unknown encoding";
1034 break;
1035 case XML_ERROR_INCORRECT_ENCODING:
1036 m_aErrorInformation.m_sMessage += "Incorrect encoding";
1037 break;
1038 case XML_ERROR_UNCLOSED_CDATA_SECTION:
1039 m_aErrorInformation.m_sMessage += "Unclosed cdata section";
1040 break;
1041 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
1042 m_aErrorInformation.m_sMessage += "External entity handling";
1043 break;
1044 case XML_ERROR_NOT_STANDALONE:
1045 m_aErrorInformation.m_sMessage += "Not standalone";
1046 break;
1047 case XML_ERROR_NONE:
1048 break;
1049 default:
1050 break;
1052 delete m_pXMLFile;
1053 m_pXMLFile = NULL;
1056 osl_unmapMappedFile(h, p, s);
1057 osl_closeFile(h);
1059 return m_pXMLFile;
1062 namespace
1065 static icu::UnicodeString lcl_QuotRange(
1066 const icu::UnicodeString& rString, const sal_Int32 nStart,
1067 const sal_Int32 nEnd, bool bInsideTag = false )
1069 icu::UnicodeString sReturn;
1070 assert( nStart < nEnd );
1071 assert( nStart >= 0 );
1072 assert( nEnd <= rString.length() );
1073 for (sal_Int32 i = nStart; i < nEnd; ++i)
1075 switch (rString[i])
1077 case '<':
1078 sReturn.append("&lt;");
1079 break;
1080 case '>':
1081 sReturn.append("&gt;");
1082 break;
1083 case '"':
1084 if( !bInsideTag )
1085 sReturn.append("&quot;");
1086 else
1087 sReturn.append(rString[i]);
1088 break;
1089 case '&':
1090 if (rString.startsWith("&amp;", i, 5))
1091 sReturn.append('&');
1092 else
1093 sReturn.append("&amp;");
1094 break;
1095 default:
1096 sReturn.append(rString[i]);
1097 break;
1100 return sReturn;
1103 static bool lcl_isTag( const icu::UnicodeString& rString )
1105 static const int nSize = 13;
1106 static const icu::UnicodeString vTags[nSize] = {
1107 "ahelp", "link", "item", "emph", "defaultinline",
1108 "switchinline", "caseinline", "variable",
1109 "bookmark_value", "image", "embedvar", "alt", "sup" };
1111 for( int nIndex = 0; nIndex < nSize; ++nIndex )
1113 if( rString.startsWith("<" + vTags[nIndex]) ||
1114 rString == "</" + vTags[nIndex] + ">" )
1115 return true;
1118 return rString == "<br/>" || rString =="<help-id-missing/>";
1121 } /// anonymous namespace
1123 OString XMLUtil::QuotHTML( const OString &rString )
1125 if( rString.trim().isEmpty() )
1126 return rString;
1127 UErrorCode nIcuErr = U_ZERO_ERROR;
1128 static const sal_uInt32 nSearchFlags =
1129 UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
1130 static const OUString sPattern(
1131 "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
1132 static const UnicodeString sSearchPat(
1133 reinterpret_cast<const UChar*>(sPattern.getStr()),
1134 sPattern.getLength() );
1136 const OUString sOUSource = OStringToOUString(rString, RTL_TEXTENCODING_UTF8);
1137 icu::UnicodeString sSource(
1138 reinterpret_cast<const UChar*>(
1139 sOUSource.getStr()), sOUSource.getLength() );
1141 RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
1142 aRegexMatcher.reset( sSource );
1144 icu::UnicodeString sReturn;
1145 int32_t nEndPos = 0;
1146 int32_t nStartPos = 0;
1147 while( aRegexMatcher.find(nStartPos, nIcuErr) && U_SUCCESS(nIcuErr) )
1149 nStartPos = aRegexMatcher.start(nIcuErr);
1150 if ( nEndPos < nStartPos )
1151 sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos));
1152 nEndPos = aRegexMatcher.end(nIcuErr);
1153 icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
1154 if( lcl_isTag(sMatch) )
1156 sReturn.append("<");
1157 sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true));
1158 sReturn.append(">");
1160 else
1161 sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos));
1162 ++nStartPos;
1164 if( nEndPos < sSource.length() )
1165 sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()));
1166 sReturn.append('\0');
1167 return
1168 OUStringToOString(
1169 OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer())),
1170 RTL_TEXTENCODING_UTF8);
1173 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */