Updated core
[LibreOffice.git] / l10ntools / source / xmlparse.cxx
blob2959bba4a202242a761ef4a2a9dfaae1df0a1216
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include "sal/config.h"
21 #include <iterator> /* std::iterator*/
23 #include <cassert>
24 #include <stdio.h>
25 #include <sal/alloca.h>
27 #include "helper.hxx"
28 #include "common.hxx"
29 #include "xmlparse.hxx"
30 #include <fstream>
31 #include <iostream>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.hxx>
34 #include <osl/process.h>
35 #include <rtl/strbuf.hxx>
36 #include <unicode/regex.h>
38 using namespace U_ICU_NAMESPACE;
39 using namespace std;
40 using namespace osl;
43 // class XMLChildNode
46 /*****************************************************************************/
47 XMLChildNode::XMLChildNode( XMLParentNode *pPar )
48 /*****************************************************************************/
49 : pParent( pPar )
51 if ( pParent )
52 pParent->AddChild( this );
56 /*****************************************************************************/
57 XMLChildNode::XMLChildNode( const XMLChildNode& obj)
58 /*****************************************************************************/
59 : XMLNode(obj),
60 pParent(obj.pParent){}
62 /*****************************************************************************/
63 XMLChildNode& XMLChildNode::operator=(const XMLChildNode& obj){
64 /*****************************************************************************/
65 if(this != &obj){
66 pParent=obj.pParent;
68 return *this;
71 // class XMLParentNode
75 /*****************************************************************************/
76 XMLParentNode::~XMLParentNode()
77 /*****************************************************************************/
79 if( pChildList ){
80 RemoveAndDeleteAllChildren();
81 delete pChildList;
82 pChildList = NULL;
84 pChildList = NULL;
86 /*****************************************************************************/
87 XMLParentNode::XMLParentNode( const XMLParentNode& obj)
88 /*****************************************************************************/
89 : XMLChildNode( obj )
91 if( obj.pChildList ){
92 pChildList=new XMLChildNodeList();
93 XMLChildNode* pNode = NULL;
94 for ( size_t i = 0; i < obj.pChildList->size(); i++ ){
95 pNode = (*obj.pChildList)[ i ];
96 if( pNode != NULL){
97 switch(pNode->GetNodeType()){
98 case XML_NODE_TYPE_ELEMENT:
99 AddChild( new XMLElement( *static_cast<XMLElement* >(pNode) ) ); break;
100 case XML_NODE_TYPE_DATA:
101 AddChild( new XMLData ( *static_cast<XMLData* > (pNode) ) ); break;
102 case XML_NODE_TYPE_COMMENT:
103 AddChild( new XMLComment( *static_cast<XMLComment* >(pNode) ) ); break;
104 case XML_NODE_TYPE_DEFAULT:
105 AddChild( new XMLDefault( *static_cast<XMLDefault* >(pNode) ) ); break;
106 default: fprintf(stdout,"XMLParentNode::XMLParentNode( const XMLParentNode& obj) strange obj");
110 }else pChildList = NULL;
112 /*****************************************************************************/
113 XMLParentNode& XMLParentNode::operator=(const XMLParentNode& obj){
114 /*****************************************************************************/
115 if(this!=&obj){
116 XMLChildNode::operator=(obj);
117 if( pChildList ){
118 RemoveAndDeleteAllChildren();
119 delete pChildList;
120 pChildList = NULL;
122 if( obj.pChildList ){
123 pChildList=new XMLChildNodeList();
124 for ( size_t i = 0; i < obj.pChildList->size(); i++ )
125 AddChild( (*obj.pChildList)[ i ] );
126 }else pChildList = NULL;
129 return *this;
131 /*****************************************************************************/
132 void XMLParentNode::AddChild( XMLChildNode *pChild )
133 /*****************************************************************************/
135 if ( !pChildList )
136 pChildList = new XMLChildNodeList();
137 pChildList->push_back( pChild );
140 /*****************************************************************************/
141 void XMLParentNode::RemoveAndDeleteAllChildren(){
142 /*****************************************************************************/
143 if ( pChildList ) {
144 for ( size_t i = 0; i < pChildList->size(); i++ )
145 delete (*pChildList)[ i ];
146 pChildList->clear();
151 // class XMLFile
154 /*****************************************************************************/
155 sal_uInt16 XMLFile::GetNodeType()
156 /*****************************************************************************/
158 return XML_NODE_TYPE_FILE;
161 void XMLFile::Write( OString const &aFilename )
163 std::ofstream s(
164 aFilename.getStr(), std::ios_base::out | std::ios_base::trunc);
165 if (!s.is_open()) {
166 std::cerr
167 << "Error: helpex cannot create file " << aFilename.getStr()
168 << '\n';
169 std::exit(EXIT_FAILURE);
171 Write(s);
172 s.close();
175 void XMLFile::WriteString( ofstream &rStream, const OUString &sString )
177 OString sText(OUStringToOString(sString, RTL_TEXTENCODING_UTF8));
178 rStream << sText.getStr();
181 sal_Bool XMLFile::Write( ofstream &rStream , XMLNode *pCur )
183 if ( !pCur )
184 Write( rStream, this );
185 else {
186 switch( pCur->GetNodeType()) {
187 case XML_NODE_TYPE_FILE: {
188 if( GetChildList())
189 for ( size_t i = 0; i < GetChildList()->size(); i++ )
190 Write( rStream, (*GetChildList())[ i ] );
192 break;
193 case XML_NODE_TYPE_ELEMENT: {
194 XMLElement *pElement = ( XMLElement * ) pCur;
195 rStream << "<";
196 WriteString( rStream, pElement->GetName());
197 if ( pElement->GetAttributeList())
198 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ) {
199 rStream << " ";
200 OUString sData( (*pElement->GetAttributeList())[ j ]->GetName() );
201 WriteString( rStream , XMLUtil::QuotHTML( sData ) );
202 rStream << "=\"";
203 sData = (*pElement->GetAttributeList())[ j ]->GetValue();
204 WriteString( rStream , XMLUtil::QuotHTML( sData ) );
205 rStream << "\"";
207 if ( !pElement->GetChildList())
208 rStream << "/>";
209 else {
210 rStream << ">";
211 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
212 Write( rStream, (*pElement->GetChildList())[ k ] );
213 rStream << "</";
214 WriteString( rStream, pElement->GetName());
215 rStream << ">";
218 break;
219 case XML_NODE_TYPE_DATA: {
220 XMLData *pData = ( XMLData * ) pCur;
221 OUString sData( pData->GetData());
222 WriteString( rStream, XMLUtil::QuotHTML( sData ) );
224 break;
225 case XML_NODE_TYPE_COMMENT: {
226 XMLComment *pComment = ( XMLComment * ) pCur;
227 rStream << "<!--";
228 WriteString( rStream, pComment->GetComment());
229 rStream << "-->";
231 break;
232 case XML_NODE_TYPE_DEFAULT: {
233 XMLDefault *pDefault = ( XMLDefault * ) pCur;
234 WriteString( rStream, pDefault->GetDefault());
236 break;
239 return sal_True;
243 void XMLFile::Print( XMLNode *pCur, sal_uInt16 nLevel )
246 if ( !pCur )
247 Print( this );
248 else {
249 switch( pCur->GetNodeType()) {
250 case XML_NODE_TYPE_FILE: {
251 if( GetChildList())
252 for ( size_t i = 0; i < GetChildList()->size(); i++ )
253 Print( (*GetChildList())[ i ] );
255 break;
256 case XML_NODE_TYPE_ELEMENT: {
257 XMLElement *pElement = ( XMLElement * ) pCur;
259 fprintf( stdout, "<%s", OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_UTF8).getStr());
260 if ( pElement->GetAttributeList())
262 for (size_t j = 0; j < pElement->GetAttributeList()->size(); ++j)
264 OString aAttrName(OUStringToOString((*pElement->GetAttributeList())[j]->GetName(),
265 RTL_TEXTENCODING_UTF8));
266 if (!aAttrName.equalsIgnoreAsciiCase(XML_LANG))
268 fprintf( stdout, " %s=\"%s\"",
269 aAttrName.getStr(),
270 OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),
271 RTL_TEXTENCODING_UTF8).getStr());
275 if ( !pElement->GetChildList())
276 fprintf( stdout, "/>" );
277 else {
278 fprintf( stdout, ">" );
279 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
280 Print( (*pElement->GetChildList())[ k ], nLevel + 1 );
281 fprintf( stdout, "</%s>", OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_UTF8).getStr());
284 break;
285 case XML_NODE_TYPE_DATA: {
286 XMLData *pData = ( XMLData * ) pCur;
287 OUString sData = pData->GetData();
288 fprintf( stdout, "%s", OUStringToOString(sData, RTL_TEXTENCODING_UTF8).getStr());
290 break;
291 case XML_NODE_TYPE_COMMENT: {
292 XMLComment *pComment = ( XMLComment * ) pCur;
293 fprintf( stdout, "<!--%s-->", OUStringToOString(pComment->GetComment(), RTL_TEXTENCODING_UTF8).getStr());
295 break;
296 case XML_NODE_TYPE_DEFAULT: {
297 XMLDefault *pDefault = ( XMLDefault * ) pCur;
298 fprintf( stdout, "%s", OUStringToOString(pDefault->GetDefault(), RTL_TEXTENCODING_UTF8).getStr());
300 break;
304 XMLFile::~XMLFile()
306 if( XMLStrings != NULL ){
307 XMLHashMap::iterator pos = XMLStrings->begin();
308 for( ; pos != XMLStrings->end() ; ++pos ){
309 delete pos->second; // Check and delete content also ?
311 delete XMLStrings;
312 XMLStrings = NULL;
315 /*****************************************************************************/
316 XMLFile::XMLFile( const OUString &rFileName ) // the file name, empty if created from memory stream
317 /*****************************************************************************/
318 : XMLParentNode( NULL ),
319 sFileName ( rFileName ),
320 ID ( "id" ),
321 OLDREF ( "oldref" ),
322 XML_LANG ( "xml-lang" ),
323 XMLStrings ( NULL )
326 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("bookmark")) , sal_True) );
327 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("variable")) , sal_True) );
328 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("paragraph")) , sal_True) );
329 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("alt")) , sal_True) );
330 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("caption")) , sal_True) );
331 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("title")) , sal_True) );
332 nodes_localize.insert( TagMap::value_type(OString(RTL_CONSTASCII_STRINGPARAM("link")) , sal_True) );
334 /*****************************************************************************/
335 void XMLFile::Extract( XMLFile *pCur )
336 /*****************************************************************************/
338 if( XMLStrings != NULL ) delete XMLStrings; // Elements ?
340 XMLStrings = new XMLHashMap();
341 if ( !pCur )
342 SearchL10NElements( this );
343 else {
344 if( pCur->GetNodeType()==XML_NODE_TYPE_FILE) {
345 SearchL10NElements(pCur);
350 /*****************************************************************************/
351 void XMLFile::InsertL10NElement( XMLElement* pElement ){
352 /*****************************************************************************/
353 OString tmpStr,id,language("");
354 LangHashMap* elem;
356 if( pElement->GetAttributeList() != NULL ){
357 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ )
359 tmpStr=OUStringToOString((*pElement->GetAttributeList())[ j ]->GetName(), RTL_TEXTENCODING_UTF8);
360 if (tmpStr == ID) { // Get the "id" Attribute
361 id = OUStringToOString((*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8);
363 if (tmpStr == XML_LANG) { // Get the "xml-lang" Attribute
364 language = OUStringToOString((*pElement->GetAttributeList())[j]->GetValue(),RTL_TEXTENCODING_UTF8);
368 }else{
369 fprintf(stdout,"XMLFile::InsertL10NElement: No AttributeList found");
370 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
371 Print( pElement , 0 );
372 fprintf(stdout,"++++++++++++++++++++++++++++++++++++++++++++++++++");
375 XMLHashMap::iterator pos = XMLStrings->find( id );
376 if( pos == XMLStrings->end() ){ // No instanze , create new one
377 elem = new LangHashMap();
378 (*elem)[ language ]=pElement;
379 XMLStrings->insert( XMLHashMap::value_type( id , elem ) );
380 order.push_back( id );
381 }else{ // Already there
382 elem=pos->second;
383 if ( (*elem)[ language ] )
385 fprintf(stdout,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", id.getStr(), language.getStr(), OUStringToOString(sFileName, RTL_TEXTENCODING_ASCII_US).getStr() );
386 exit( -1 );
388 (*elem)[ language ]=pElement;
392 XMLFile::XMLFile( const XMLFile& obj )
393 /*****************************************************************************/
394 : XMLParentNode( obj ),
395 sFileName ( obj.sFileName ),
396 ID ( "id" ),
397 OLDREF ( "oldref" ),
398 XML_LANG ( "xml-lang" ),
399 XMLStrings ( NULL )
401 if( this!=&obj )
403 nodes_localize =obj.nodes_localize;
404 order =obj.order;
408 /*****************************************************************************/
409 XMLFile& XMLFile::operator=(const XMLFile& obj){
410 /*****************************************************************************/
411 if( this!=&obj ){
413 XMLParentNode::operator=(obj);
415 nodes_localize =obj.nodes_localize;
416 order =obj.order;
418 if( XMLStrings ) delete XMLStrings;
420 if( obj.XMLStrings )
422 XMLStrings = new XMLHashMap();
423 for( XMLHashMap::iterator pos = obj.XMLStrings->begin() ; pos != obj.XMLStrings->end() ; ++pos )
425 LangHashMap* elem=pos->second;
426 LangHashMap* newelem = new LangHashMap();
427 for(LangHashMap::iterator pos2=elem->begin(); pos2!=elem->end();++pos2){
428 (*newelem)[ pos2->first ] = new XMLElement( *pos2->second );
430 (*XMLStrings)[ pos->first ] = newelem;
434 return *this;
438 /*****************************************************************************/
439 void XMLFile::SearchL10NElements( XMLParentNode *pCur , int pos)
440 /*****************************************************************************/
442 static const OString LOCALIZE("localize");
443 static const OString THEID("id");
444 bool bInsert = true;
445 if ( !pCur )
446 SearchL10NElements( this );
447 else {
448 switch( pCur->GetNodeType()) {
449 case XML_NODE_TYPE_FILE: {
450 XMLParentNode* pElement;
451 if( GetChildList()){
452 for ( size_t i = 0; i < GetChildList()->size(); i++ ){
453 pElement = (XMLParentNode*) (*GetChildList())[ i ];
454 if( pElement->GetNodeType() == XML_NODE_TYPE_ELEMENT ) SearchL10NElements( pElement , i);
458 break;
459 case XML_NODE_TYPE_ELEMENT: {
460 XMLElement *pElement = ( XMLElement * ) pCur;
461 OString sName(OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_ASCII_US).toAsciiLowerCase());
462 OString language,tmpStrVal,oldref;
463 if ( pElement->GetAttributeList())
465 for ( size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j )
467 const OString tmpStr = OUStringToOString((*pElement->GetAttributeList())[j]->GetName(), RTL_TEXTENCODING_UTF8);
468 if (tmpStr == THEID) { // Get the "id" Attribute
469 tmpStrVal=OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8 );
471 if (tmpStr == LOCALIZE) { // Get the "localize" Attribute
472 bInsert=false;
474 if (tmpStr == XML_LANG) { // Get the "xml-lang" Attribute
475 language=OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8 );
477 if (tmpStr == OLDREF) { // Get the "oldref" Attribute
478 oldref=OUStringToOString( (*pElement->GetAttributeList())[ j ]->GetValue(),RTL_TEXTENCODING_UTF8 );
481 pElement->SetLanguageId ( language );
482 pElement->SetId(tmpStrVal);
483 pElement->SetOldRef ( oldref );
484 pElement->SetPos( pos );
487 if ( bInsert && ( nodes_localize.find( sName ) != nodes_localize.end() ) )
488 InsertL10NElement(pElement);
489 else if ( bInsert && pElement->GetChildList() ){
490 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ )
491 SearchL10NElements( (XMLParentNode*)(*pElement->GetChildList())[ k ], k);
494 break;
495 case XML_NODE_TYPE_DATA: {
497 break;
498 case XML_NODE_TYPE_COMMENT: {
500 break;
501 case XML_NODE_TYPE_DEFAULT: {
503 break;
508 /*****************************************************************************/
509 bool XMLFile::CheckExportStatus( XMLParentNode *pCur )
510 /*****************************************************************************/
512 static bool bStatusExport = true;
513 const OString STATUS(RTL_CONSTASCII_STRINGPARAM("status"));
514 const OString PUBLISH(RTL_CONSTASCII_STRINGPARAM("PUBLISH"));
515 const OString DEPRECATED(RTL_CONSTASCII_STRINGPARAM("DEPRECATED"));
516 const OString TOPIC(RTL_CONSTASCII_STRINGPARAM("topic"));
518 bool bInsert = true;
519 if ( !pCur )
520 CheckExportStatus( this );
521 else {
522 switch( pCur->GetNodeType()) {
523 case XML_NODE_TYPE_FILE: {
524 XMLParentNode* pElement;
525 if( GetChildList()){
526 for ( size_t i = 0; i < GetChildList()->size(); i++ ){
527 pElement = (XMLParentNode*)(*GetChildList())[ i ];
528 if( pElement->GetNodeType() == XML_NODE_TYPE_ELEMENT ) CheckExportStatus( pElement );//, i);
532 break;
533 case XML_NODE_TYPE_ELEMENT: {
534 XMLElement *pElement = ( XMLElement * ) pCur;
535 OString sName(OUStringToOString(pElement->GetName(), RTL_TEXTENCODING_ASCII_US));
536 if (sName.equalsIgnoreAsciiCase(TOPIC))
538 if ( pElement->GetAttributeList())
540 for (size_t j = 0 , cnt = pElement->GetAttributeList()->size(); j < cnt && bInsert; ++j)
542 const OString tmpStr(OUStringToOString((*pElement->GetAttributeList())[j]->GetName(),
543 RTL_TEXTENCODING_UTF8));
544 if (tmpStr.equalsIgnoreAsciiCase(STATUS))
546 OString tmpStrVal(OUStringToOString( (*pElement->GetAttributeList())[j]->GetValue(),
547 RTL_TEXTENCODING_UTF8));
548 if (!tmpStrVal.equalsIgnoreAsciiCase(PUBLISH) &&
549 !tmpStrVal.equalsIgnoreAsciiCase(DEPRECATED))
551 bStatusExport = false;
558 else if ( pElement->GetChildList() )
560 for (size_t k = 0; k < pElement->GetChildList()->size(); ++k)
561 CheckExportStatus( (XMLParentNode*)(*pElement->GetChildList())[k] );
564 break;
567 return bStatusExport;
570 /*****************************************************************************/
571 sal_uInt16 XMLElement::GetNodeType()
572 /*****************************************************************************/
574 return XML_NODE_TYPE_ELEMENT;
577 /*****************************************************************************/
578 XMLElement::XMLElement(const XMLElement& obj)
579 /*****************************************************************************/
581 XMLParentNode ( obj ),
582 sElementName ( obj.sElementName ),
583 pAttributes ( NULL ),
584 project ( obj.project ),
585 filename ( obj.filename ),
586 id ( obj.id ),
587 sOldRef ( obj.sOldRef ),
588 resourceType ( obj.resourceType ),
589 languageId ( obj.languageId ),
590 nPos ( obj.nPos )
593 if ( obj.pAttributes ){
594 pAttributes = new XMLAttributeList();
595 for ( size_t i = 0; i < obj.pAttributes->size(); i++ )
596 AddAttribute( (*obj.pAttributes)[ i ]->GetName(), (*obj.pAttributes)[ i ]->GetValue() );
600 /*****************************************************************************/
601 XMLElement& XMLElement::operator=(const XMLElement& obj){
602 /*****************************************************************************/
603 if( this!=&obj ){
604 XMLParentNode::operator=(obj);
605 sElementName =obj.sElementName;
606 project =obj.project;
607 filename =obj.filename;
608 id =obj.id;
609 sOldRef =obj.sOldRef;
610 resourceType =obj.resourceType;
611 languageId =obj.languageId;
612 nPos =obj.nPos;
614 if ( pAttributes ){
615 for ( size_t i = 0; i < pAttributes->size(); i++ )
616 delete (*pAttributes)[ i ];
617 delete pAttributes;
619 if ( obj.pAttributes ){
620 pAttributes =new XMLAttributeList();
621 for ( size_t i = 0; i < obj.pAttributes->size(); i++ )
622 AddAttribute( (*obj.pAttributes)[ i ]->GetName(), (*obj.pAttributes)[ i ]->GetValue() );
625 return *this;
628 /*****************************************************************************/
629 void XMLElement::AddAttribute( const OUString &rAttribute, const OUString &rValue )
630 /*****************************************************************************/
632 if ( !pAttributes )
633 pAttributes = new XMLAttributeList();
634 pAttributes->push_back( new XMLAttribute( rAttribute, rValue ) );
637 /*****************************************************************************/
638 void XMLElement::ChangeLanguageTag( const OUString &rValue )
640 SetLanguageId(OUStringToOString(rValue, RTL_TEXTENCODING_UTF8));
641 if ( pAttributes )
643 for (size_t i = 0; i < pAttributes->size(); ++i)
645 if ( (*pAttributes)[ i ]->GetName() == "xml-lang" )
646 (*pAttributes)[ i ]->setValue(rValue);
649 XMLChildNode* pNode = NULL;
650 XMLElement* pElem = NULL;
651 XMLChildNodeList* pCList = GetChildList();
653 if( pCList != NULL )
655 for ( size_t i = 0; i < pCList->size(); i++ )
657 pNode = (*pCList)[ i ];
658 if( pNode != NULL && pNode->GetNodeType() == XML_NODE_TYPE_ELEMENT )
660 pElem = static_cast< XMLElement* >(pNode);
661 pElem->ChangeLanguageTag( rValue );
662 pElem->SetLanguageId(OUStringToOString(rValue, RTL_TEXTENCODING_UTF8));
663 pElem = NULL;
664 pNode = NULL;
667 pCList = NULL;
671 /*****************************************************************************/
672 XMLElement::~XMLElement()
673 /*****************************************************************************/
675 if ( pAttributes ) {
676 for ( size_t i = 0; i < pAttributes->size(); i++ )
677 delete (*pAttributes)[ i ];
679 delete pAttributes;
680 pAttributes = NULL;
684 /*****************************************************************************/
685 OUString XMLElement::ToOUString(){
686 /*****************************************************************************/
687 OUStringBuffer* buffer = new OUStringBuffer();
688 Print(this,*buffer,true);
689 OUString result=buffer->makeStringAndClear();
690 OUString xy(result.getStr());
691 result=OUString(xy);
692 delete buffer;
693 return result;
695 /*****************************************************************************/
696 void XMLElement::Print(XMLNode *pCur, OUStringBuffer& buffer , bool rootelement ){
697 /*****************************************************************************/
698 static const OUString XML_LANG ( "xml-lang" );
700 if(pCur!=NULL){
701 if(rootelement){
702 XMLElement *pElement = ( XMLElement * ) pCur;
703 if ( pElement->GetAttributeList()){
704 if ( pElement->GetChildList()){
705 XMLChildNode* tmp=NULL;
706 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){
707 tmp = (*pElement->GetChildList())[ k ];
708 Print( tmp, buffer , false);
713 else{
715 switch( pCur->GetNodeType()) {
716 case XML_NODE_TYPE_ELEMENT: {
717 XMLElement *pElement = ( XMLElement * ) pCur;
719 if( !pElement->GetName().equalsIgnoreAsciiCase("comment") ){
720 buffer.append( OUString("<") );
721 buffer.append( pElement->GetName() );
722 if ( pElement->GetAttributeList()){
723 for ( size_t j = 0; j < pElement->GetAttributeList()->size(); j++ ){
725 OUString aAttrName( (*pElement->GetAttributeList())[ j ]->GetName() );
726 if( !aAttrName.equalsIgnoreAsciiCase( XML_LANG ) ) {
727 buffer.append( OUString(" ") );
728 buffer.append( aAttrName );
729 buffer.append( OUString("=") );
730 buffer.append( OUString("\"") );
731 buffer.append( (*pElement->GetAttributeList())[ j ]->GetValue() );
732 buffer.append( OUString("\"") );
736 if ( !pElement->GetChildList())
737 buffer.append( OUString("/>") );
738 else {
739 buffer.append( OUString(">") );
740 XMLChildNode* tmp=NULL;
741 for ( size_t k = 0; k < pElement->GetChildList()->size(); k++ ){
742 tmp = (*pElement->GetChildList())[ k ];
743 Print( tmp, buffer , false);
745 buffer.append( OUString("</") );
746 buffer.append( pElement->GetName() );
747 buffer.append( OUString(">") );
751 break;
752 case XML_NODE_TYPE_DATA: {
753 XMLData *pData = ( XMLData * ) pCur;
754 OUString sData = pData->GetData();
755 buffer.append( sData );
757 break;
758 case XML_NODE_TYPE_COMMENT: {
759 XMLComment *pComment = ( XMLComment * ) pCur;
760 buffer.append( OUString("<!--") );
761 buffer.append( pComment->GetComment() );
762 buffer.append( OUString("-->") );
764 break;
765 case XML_NODE_TYPE_DEFAULT: {
766 XMLDefault *pDefault = ( XMLDefault * ) pCur;
767 buffer.append( pDefault->GetDefault() );
769 break;
772 }else {
773 fprintf(stdout,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
774 return;
780 // class XMLData
782 /*****************************************************************************/
783 XMLData::XMLData(const XMLData& obj)
784 /*****************************************************************************/
785 : XMLChildNode( obj ),
786 sData( obj.sData ) ,
787 isNewCreated ( obj.isNewCreated ){}
789 /*****************************************************************************/
790 XMLData& XMLData::operator=(const XMLData& obj){
791 /*****************************************************************************/
792 if( this!=&obj ){
793 XMLChildNode::operator=( obj );
794 sData = obj.sData;
795 isNewCreated = obj.isNewCreated;
797 return *this;
799 /*****************************************************************************/
800 void XMLData::AddData( const OUString &rData) {
801 /*****************************************************************************/
802 sData += rData;
805 /*****************************************************************************/
806 sal_uInt16 XMLData::GetNodeType()
807 /*****************************************************************************/
809 return XML_NODE_TYPE_DATA;
813 // class XMLComment
816 /*****************************************************************************/
817 sal_uInt16 XMLComment::GetNodeType()
818 /*****************************************************************************/
820 return XML_NODE_TYPE_COMMENT;
822 /*****************************************************************************/
823 XMLComment::XMLComment(const XMLComment& obj)
824 /*****************************************************************************/
825 : XMLChildNode( obj ),
826 sComment( obj.sComment ){}
828 /*****************************************************************************/
829 XMLComment& XMLComment::operator=(const XMLComment& obj){
830 /*****************************************************************************/
831 if( this!=&obj ){
832 XMLChildNode::operator=( obj );
833 sComment = obj.sComment;
835 return *this;
839 // class XMLDefault
842 /*****************************************************************************/
843 sal_uInt16 XMLDefault::GetNodeType()
844 /*****************************************************************************/
846 return XML_NODE_TYPE_DEFAULT;
848 /*****************************************************************************/
849 XMLDefault::XMLDefault(const XMLDefault& obj)
850 /*****************************************************************************/
851 : XMLChildNode( obj ),
852 sDefault( obj.sDefault){}
854 /*****************************************************************************/
855 XMLDefault& XMLDefault::operator=(const XMLDefault& obj){
856 /*****************************************************************************/
857 if( this!=&obj ){
858 XMLChildNode::operator=( obj );
859 sDefault = obj.sDefault;
861 return *this;
866 // class SimpleXMLParser
869 #define XML_CHAR_TO_OUSTRING(x) OStringToOUString(OString(x), RTL_TEXTENCODING_UTF8)
870 #define XML_CHAR_N_TO_OUSTRING(x,n) OStringToOUString(OString(x,n), RTL_TEXTENCODING_UTF8 )
872 namespace
875 static OUString lcl_pathnameToAbsoluteUrl(const OUString& rPathname) {
876 OUString sUrl;
877 if (osl::FileBase::getFileURLFromSystemPath(rPathname, sUrl)
878 != osl::FileBase::E_None)
880 std::cerr << "Error: Cannot convert input pathname to URL\n";
881 std::exit(EXIT_FAILURE);
883 OUString sCwd;
884 if (osl_getProcessWorkingDir(&sCwd.pData) != osl_Process_E_None) {
885 std::cerr << "Error: Cannot determine cwd\n";
886 std::exit(EXIT_FAILURE);
888 if (osl::FileBase::getAbsoluteFileURL(sCwd, sUrl, sUrl)
889 != osl::FileBase::E_None)
891 std::cerr << "Error: Cannot convert input URL to absolute URL\n";
892 std::exit(EXIT_FAILURE);
894 return sUrl;
899 /*****************************************************************************/
900 SimpleXMLParser::SimpleXMLParser()
901 /*****************************************************************************/
902 : pXMLFile( NULL )
904 aParser = XML_ParserCreate( NULL );
905 XML_SetUserData( aParser, this );
906 XML_SetElementHandler( aParser, (XML_StartElementHandler) StartElementHandler, (XML_EndElementHandler) EndElementHandler );
907 XML_SetCharacterDataHandler( aParser, (XML_CharacterDataHandler) CharacterDataHandler );
908 XML_SetCommentHandler( aParser, (XML_CommentHandler) CommentHandler );
909 XML_SetDefaultHandler( aParser, (XML_DefaultHandler) DefaultHandler );
912 /*****************************************************************************/
913 SimpleXMLParser::~SimpleXMLParser()
914 /*****************************************************************************/
916 XML_ParserFree( aParser );
919 /*****************************************************************************/
920 void SimpleXMLParser::StartElementHandler(
921 void *userData, const XML_Char *name, const XML_Char **atts )
922 /*****************************************************************************/
924 (( SimpleXMLParser * ) userData )->StartElement( name, atts );
928 /*****************************************************************************/
929 void SimpleXMLParser::EndElementHandler(
930 void *userData, const XML_Char *name )
931 /*****************************************************************************/
933 (( SimpleXMLParser * ) userData )->EndElement( name );
936 /*****************************************************************************/
937 void SimpleXMLParser::CharacterDataHandler(
938 void *userData, const XML_Char *s, int len )
939 /*****************************************************************************/
941 (( SimpleXMLParser * ) userData )->CharacterData( s, len );
944 /*****************************************************************************/
945 void SimpleXMLParser::CommentHandler(
946 void *userData, const XML_Char *data )
947 /*****************************************************************************/
949 (( SimpleXMLParser * ) userData )->Comment( data );
952 /*****************************************************************************/
953 void SimpleXMLParser::DefaultHandler(
954 void *userData, const XML_Char *s, int len )
955 /*****************************************************************************/
957 (( SimpleXMLParser * ) userData )->Default( s, len );
960 /*****************************************************************************/
961 void SimpleXMLParser::StartElement(
962 const XML_Char *name, const XML_Char **atts )
963 /*****************************************************************************/
965 OUString sElementName = OUString( XML_CHAR_TO_OUSTRING( name ));
966 XMLElement *pElement = new XMLElement( sElementName, ( XMLParentNode * ) pCurNode );
967 pCurNode = pElement;
968 pCurData = NULL;
970 int i = 0;
971 while( atts[i] ) {
972 pElement->AddAttribute(
973 OUString( XML_CHAR_TO_OUSTRING( atts[ i ] )),
974 OUString( XML_CHAR_TO_OUSTRING( atts[ i + 1 ] )));
975 i += 2;
979 /*****************************************************************************/
980 void SimpleXMLParser::EndElement( const XML_Char *name )
981 /*****************************************************************************/
983 // This variable is not used at all, but the sax C interface can't be changed
984 // To prevent warnings this dummy assignment is used
985 // +++
986 (void) name;
988 pCurNode = pCurNode->GetParent();
989 pCurData = NULL;
992 /*****************************************************************************/
993 void SimpleXMLParser::CharacterData(
994 const XML_Char *s, int len )
995 /*****************************************************************************/
997 if ( !pCurData ){
998 OUString x = XML_CHAR_N_TO_OUSTRING( s, len );
999 XMLUtil::UnQuotHTML(x);
1000 pCurData = new XMLData( x , pCurNode );
1001 }else{
1002 OUString x = XML_CHAR_N_TO_OUSTRING( s, len );
1003 XMLUtil::UnQuotHTML(x);
1004 pCurData->AddData( x );
1009 /*****************************************************************************/
1010 void SimpleXMLParser::Comment(
1011 const XML_Char *data )
1012 /*****************************************************************************/
1014 pCurData = NULL;
1015 new XMLComment( OUString( XML_CHAR_TO_OUSTRING( data )), pCurNode );
1018 /*****************************************************************************/
1019 void SimpleXMLParser::Default(
1020 const XML_Char *s, int len )
1021 /*****************************************************************************/
1023 pCurData = NULL;
1024 new XMLDefault(
1025 OUString( XML_CHAR_N_TO_OUSTRING( s, len )), pCurNode );
1028 /*****************************************************************************/
1029 XMLFile *SimpleXMLParser::Execute( const OUString &rFileName, XMLFile* pXMLFileIn )
1030 /*****************************************************************************/
1032 aErrorInformation.eCode = XML_ERROR_NONE;
1033 aErrorInformation.nLine = 0;
1034 aErrorInformation.nColumn = 0;
1035 aErrorInformation.sMessage = OUString( "ERROR: Unable to open file ");
1036 aErrorInformation.sMessage += rFileName;
1038 OUString aFileURL(lcl_pathnameToAbsoluteUrl(rFileName));
1040 oslFileHandle h;
1041 if (osl_openFile(aFileURL.pData, &h, osl_File_OpenFlag_Read)
1042 != osl_File_E_None)
1044 return 0;
1047 sal_uInt64 s;
1048 oslFileError e = osl_getFileSize(h, &s);
1049 void * p = NULL;
1050 if (e == osl_File_E_None) {
1051 e = osl_mapFile(h, &p, s, 0, 0);
1053 if (e != osl_File_E_None) {
1054 osl_closeFile(h);
1055 return 0;
1058 pXMLFile = pXMLFileIn;
1059 pXMLFile->SetName( rFileName );
1061 pCurNode = pXMLFile;
1062 pCurData = NULL;
1064 aErrorInformation.eCode = XML_ERROR_NONE;
1065 aErrorInformation.nLine = 0;
1066 aErrorInformation.nColumn = 0;
1067 if ( !pXMLFile->GetName().isEmpty()) {
1068 aErrorInformation.sMessage = OUString( "File ");
1069 aErrorInformation.sMessage += pXMLFile->GetName();
1070 aErrorInformation.sMessage += OUString( " parsed successfully");
1072 else
1073 aErrorInformation.sMessage = OUString( "XML-File parsed successfully");
1075 if (!XML_Parse(aParser, reinterpret_cast< char * >(p), s, true))
1077 aErrorInformation.eCode = XML_GetErrorCode( aParser );
1078 aErrorInformation.nLine = XML_GetErrorLineNumber( aParser );
1079 aErrorInformation.nColumn = XML_GetErrorColumnNumber( aParser );
1081 aErrorInformation.sMessage = OUString( "ERROR: ");
1082 if ( !pXMLFile->GetName().isEmpty())
1083 aErrorInformation.sMessage += pXMLFile->GetName();
1084 else
1085 aErrorInformation.sMessage += OUString( "XML-File (");
1086 aErrorInformation.sMessage += OUString::valueOf(
1087 sal::static_int_cast< sal_Int64 >(aErrorInformation.nLine));
1088 aErrorInformation.sMessage += OUString( ",");
1089 aErrorInformation.sMessage += OUString::valueOf(
1090 sal::static_int_cast< sal_Int64 >(aErrorInformation.nColumn));
1091 aErrorInformation.sMessage += OUString( "): ");
1093 switch (aErrorInformation.eCode) {
1094 case XML_ERROR_NO_MEMORY:
1095 aErrorInformation.sMessage += OUString( "No memory");
1096 break;
1097 case XML_ERROR_SYNTAX:
1098 aErrorInformation.sMessage += OUString( "Syntax");
1099 break;
1100 case XML_ERROR_NO_ELEMENTS:
1101 aErrorInformation.sMessage += OUString( "No elements");
1102 break;
1103 case XML_ERROR_INVALID_TOKEN:
1104 aErrorInformation.sMessage += OUString( "Invalid token");
1105 break;
1106 case XML_ERROR_UNCLOSED_TOKEN:
1107 aErrorInformation.sMessage += OUString( "Unclosed token");
1108 break;
1109 case XML_ERROR_PARTIAL_CHAR:
1110 aErrorInformation.sMessage += OUString( "Partial char");
1111 break;
1112 case XML_ERROR_TAG_MISMATCH:
1113 aErrorInformation.sMessage += OUString( "Tag mismatch");
1114 break;
1115 case XML_ERROR_DUPLICATE_ATTRIBUTE:
1116 aErrorInformation.sMessage += OUString( "Dublicat attribute");
1117 break;
1118 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT:
1119 aErrorInformation.sMessage += OUString( "Junk after doc element");
1120 break;
1121 case XML_ERROR_PARAM_ENTITY_REF:
1122 aErrorInformation.sMessage += OUString( "Param entity ref");
1123 break;
1124 case XML_ERROR_UNDEFINED_ENTITY:
1125 aErrorInformation.sMessage += OUString( "Undefined entity");
1126 break;
1127 case XML_ERROR_RECURSIVE_ENTITY_REF:
1128 aErrorInformation.sMessage += OUString( "Recursive entity ref");
1129 break;
1130 case XML_ERROR_ASYNC_ENTITY:
1131 aErrorInformation.sMessage += OUString( "Async_entity");
1132 break;
1133 case XML_ERROR_BAD_CHAR_REF:
1134 aErrorInformation.sMessage += OUString( "Bad char ref");
1135 break;
1136 case XML_ERROR_BINARY_ENTITY_REF:
1137 aErrorInformation.sMessage += OUString( "Binary entity");
1138 break;
1139 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF:
1140 aErrorInformation.sMessage += OUString( "Attribute external entity ref");
1141 break;
1142 case XML_ERROR_MISPLACED_XML_PI:
1143 aErrorInformation.sMessage += OUString( "Misplaced xml pi");
1144 break;
1145 case XML_ERROR_UNKNOWN_ENCODING:
1146 aErrorInformation.sMessage += OUString( "Unknown encoding");
1147 break;
1148 case XML_ERROR_INCORRECT_ENCODING:
1149 aErrorInformation.sMessage += OUString( "Incorrect encoding");
1150 break;
1151 case XML_ERROR_UNCLOSED_CDATA_SECTION:
1152 aErrorInformation.sMessage += OUString( "Unclosed cdata section");
1153 break;
1154 case XML_ERROR_EXTERNAL_ENTITY_HANDLING:
1155 aErrorInformation.sMessage += OUString( "External entity handling");
1156 break;
1157 case XML_ERROR_NOT_STANDALONE:
1158 aErrorInformation.sMessage += OUString( "Not standalone");
1159 break;
1160 case XML_ERROR_NONE:
1161 break;
1162 default:
1163 break;
1165 delete pXMLFile;
1166 pXMLFile = NULL;
1169 osl_unmapMappedFile(h, p, s);
1170 osl_closeFile(h);
1172 return pXMLFile;
1175 namespace
1178 static icu::UnicodeString lcl_QuotRange(
1179 const icu::UnicodeString& rString, const sal_Int32 nStart,
1180 const sal_Int32 nEnd, bool bInsideTag = false )
1182 icu::UnicodeString sReturn;
1183 assert( nStart < nEnd );
1184 assert( nStart >= 0 );
1185 assert( nEnd <= rString.length() );
1186 for (sal_Int32 i = nStart; i < nEnd; ++i)
1188 switch (rString[i])
1190 case '<':
1191 sReturn.append("&lt;");
1192 break;
1193 case '>':
1194 sReturn.append("&gt;");
1195 break;
1196 case '"':
1197 if( !bInsideTag )
1198 sReturn.append("&quot;");
1199 else
1200 sReturn.append(rString[i]);
1201 break;
1202 case '&':
1203 if (rString.startsWith("&amp;", i, 5))
1204 sReturn.append('&');
1205 else
1206 sReturn.append("&amp;");
1207 break;
1208 default:
1209 sReturn.append(rString[i]);
1210 break;
1213 return sReturn;
1216 static bool lcl_isTag( const icu::UnicodeString& rString )
1218 static const int nSize = 13;
1219 static const icu::UnicodeString vTags[nSize] = {
1220 "ahelp", "link", "item", "emph", "defaultinline",
1221 "switchinline", "caseinline", "variable",
1222 "bookmark_value", "image", "embedvar", "alt", "sup" };
1224 for( int nIndex = 0; nIndex < nSize; ++nIndex )
1226 if( rString.startsWith("<" + vTags[nIndex]) ||
1227 rString == "</" + vTags[nIndex] + ">" )
1228 return true;
1231 return rString == "<br/>" || rString =="<help-id-missing/>";
1234 } /// anonymous namespace
1236 OUString XMLUtil::QuotHTML( const OUString &rString )
1238 if( rString.trim().isEmpty() )
1239 return rString;
1240 UErrorCode nIcuErr = U_ZERO_ERROR;
1241 static const sal_uInt32 nSearchFlags =
1242 UREGEX_DOTALL | UREGEX_CASE_INSENSITIVE;
1243 static const OUString sPattern(
1244 "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>");
1245 static const UnicodeString sSearchPat(
1246 reinterpret_cast<const UChar*>(sPattern.getStr()),
1247 sPattern.getLength() );
1249 icu::UnicodeString sSource(
1250 reinterpret_cast<const UChar*>(
1251 rString.getStr()), rString.getLength() );
1253 RegexMatcher aRegexMatcher( sSearchPat, nSearchFlags, nIcuErr );
1254 aRegexMatcher.reset( sSource );
1256 icu::UnicodeString sReturn;
1257 int32_t nEndPos = 0;
1258 int32_t nStartPos = 0;
1259 while( aRegexMatcher.find(nStartPos, nIcuErr) && nIcuErr == U_ZERO_ERROR )
1261 nStartPos = aRegexMatcher.start(nIcuErr);
1262 if ( nEndPos < nStartPos )
1263 sReturn.append(lcl_QuotRange(sSource, nEndPos, nStartPos));
1264 nEndPos = aRegexMatcher.end(nIcuErr);
1265 icu::UnicodeString sMatch = aRegexMatcher.group(nIcuErr);
1266 if( lcl_isTag(sMatch) )
1268 sReturn.append("<");
1269 sReturn.append(lcl_QuotRange(sSource, nStartPos+1, nEndPos-1, true));
1270 sReturn.append(">");
1272 else
1273 sReturn.append(lcl_QuotRange(sSource, nStartPos, nEndPos));
1274 ++nStartPos;
1276 if( nEndPos < sSource.length() )
1277 sReturn.append(lcl_QuotRange(sSource, nEndPos, sSource.length()));
1278 sReturn.append('\0');
1279 return OUString(reinterpret_cast<const sal_Unicode*>(sReturn.getBuffer()));
1282 OUString XMLUtil::UnQuotHTML( const OUString& rString )
1284 const OString sString(OUStringToOString(rString, RTL_TEXTENCODING_UTF8));
1285 return OStringToOUString(helper::UnQuotHTML(sString), RTL_TEXTENCODING_UTF8);
1288 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */