1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
19 #include <sal/config.h>
23 #include <string_view>
27 #include <xmlparse.hxx>
30 #include <osl/file.hxx>
31 #include <osl/process.h>
32 #include <o3tl/string_view.hxx>
33 #include <rtl/ustring.hxx>
34 #include <rtl/strbuf.hxx>
35 #include <unicode/regex.h>
39 constexpr OString XML_LANG
= "xml-lang"_ostr
;
44 XMLChildNode::XMLChildNode( XMLParentNode
*pPar
)
48 m_pParent
->AddChild( this );
52 XMLChildNode::XMLChildNode( const XMLChildNode
& rObj
)
54 m_pParent(rObj
.m_pParent
)
58 XMLChildNode
& XMLChildNode::operator=(const XMLChildNode
& rObj
)
62 m_pParent
=rObj
.m_pParent
;
70 XMLParentNode::~XMLParentNode()
74 RemoveAndDeleteAllChildren();
78 XMLParentNode::XMLParentNode( const XMLParentNode
& rObj
)
79 : XMLChildNode( rObj
)
81 if( !rObj
.m_pChildList
)
84 m_pChildList
.reset( new XMLChildNodeList
);
85 for ( size_t i
= 0; i
< rObj
.m_pChildList
->size(); i
++ )
87 XMLChildNode
* pNode
= (*rObj
.m_pChildList
)[ i
];
90 switch(pNode
->GetNodeType())
92 case XMLNodeType::ELEMENT
:
93 AddChild( new XMLElement( *static_cast<XMLElement
* >(pNode
) ) ); break;
94 case XMLNodeType::DATA
:
95 AddChild( new XMLData ( *static_cast<XMLData
* > (pNode
) ) ); break;
96 case XMLNodeType::COMMENT
:
97 AddChild( new XMLComment( *static_cast<XMLComment
* >(pNode
) ) ); break;
98 case XMLNodeType::DEFAULT
:
99 AddChild( new XMLDefault( *static_cast<XMLDefault
* >(pNode
) ) ); break;
100 default: fprintf(stdout
,"XMLParentNode::XMLParentNode( const XMLParentNode& rObj) strange obj");
106 XMLParentNode
& XMLParentNode::operator=(const XMLParentNode
& rObj
)
110 XMLChildNode::operator=(rObj
);
113 RemoveAndDeleteAllChildren();
115 if( rObj
.m_pChildList
)
117 m_pChildList
.reset( new XMLChildNodeList
);
118 for ( size_t i
= 0; i
< rObj
.m_pChildList
->size(); i
++ )
119 AddChild( (*rObj
.m_pChildList
)[ i
] );
122 m_pChildList
.reset();
127 void XMLParentNode::AddChild( XMLChildNode
*pChild
)
130 m_pChildList
.reset( new XMLChildNodeList
);
131 m_pChildList
->push_back( pChild
);
134 void XMLParentNode::RemoveAndDeleteAllChildren()
138 for ( size_t i
= 0; i
< m_pChildList
->size(); i
++ )
139 delete (*m_pChildList
)[ i
];
140 m_pChildList
->clear();
147 void XMLFile::Write( OString
const &aFilename
)
150 aFilename
.getStr(), std::ios_base::out
| std::ios_base::trunc
);
154 << "Error: helpex cannot create file " << aFilename
156 std::exit(EXIT_FAILURE
);
162 void XMLFile::Write( std::ofstream
&rStream
, XMLNode
*pCur
)
165 Write( rStream
, this );
167 switch( pCur
->GetNodeType())
169 case XMLNodeType::XFILE
:
172 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
173 Write( rStream
, (*GetChildList())[ i
] );
176 case XMLNodeType::ELEMENT
:
178 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
180 rStream
<< pElement
->GetName();
181 if ( pElement
->GetAttributeList())
182 for ( size_t j
= 0; j
< pElement
->GetAttributeList()->size(); j
++ )
185 OString
sData( (*pElement
->GetAttributeList())[ j
]->GetName() );
186 rStream
<< XMLUtil::QuotHTML( sData
);
188 sData
= (*pElement
->GetAttributeList())[ j
]->GetValue();
189 rStream
<< XMLUtil::QuotHTML( sData
);
192 if ( !pElement
->GetChildList())
197 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
198 Write( rStream
, (*pElement
->GetChildList())[ k
] );
200 rStream
<< pElement
->GetName();
205 case XMLNodeType::DATA
:
207 OString
sData( static_cast<const XMLData
*>(pCur
)->GetData());
208 rStream
<< XMLUtil::QuotHTML( sData
);
211 case XMLNodeType::COMMENT
:
213 const XMLComment
*pComment
= static_cast<const XMLComment
*>(pCur
);
215 rStream
<< pComment
->GetComment();
219 case XMLNodeType::DEFAULT
:
221 const XMLDefault
*pDefault
= static_cast<const XMLDefault
*>(pCur
);
222 rStream
<< pDefault
->GetDefault();
229 void XMLFile::Print( XMLNode
*pCur
, sal_uInt16 nLevel
)
235 switch( pCur
->GetNodeType())
237 case XMLNodeType::XFILE
:
240 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
241 Print( (*GetChildList())[ i
] );
244 case XMLNodeType::ELEMENT
:
246 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
248 fprintf( stdout
, "<%s", pElement
->GetName().getStr());
249 if ( pElement
->GetAttributeList())
251 for (size_t j
= 0; j
< pElement
->GetAttributeList()->size(); ++j
)
253 const OString
aAttrName((*pElement
->GetAttributeList())[j
]->GetName());
254 if (aAttrName
!= XML_LANG
)
256 fprintf( stdout
, " %s=\"%s\"",
258 (*pElement
->GetAttributeList())[ j
]->GetValue().getStr());
262 if ( !pElement
->GetChildList())
263 fprintf( stdout
, "/>" );
266 fprintf( stdout
, ">" );
267 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
268 Print( (*pElement
->GetChildList())[ k
], nLevel
+ 1 );
269 fprintf( stdout
, "</%s>", pElement
->GetName().getStr());
273 case XMLNodeType::DATA
:
275 const XMLData
*pData
= static_cast<const XMLData
*>(pCur
);
276 fprintf( stdout
, "%s", pData
->GetData().getStr());
279 case XMLNodeType::COMMENT
:
281 const XMLComment
*pComment
= static_cast<const XMLComment
*>(pCur
);
282 fprintf( stdout
, "<!--%s-->", pComment
->GetComment().getStr());
285 case XMLNodeType::DEFAULT
:
287 const XMLDefault
*pDefault
= static_cast<const XMLDefault
*>(pCur
);
288 fprintf( stdout
, "%s", pDefault
->GetDefault().getStr());
298 for (auto const& pos
: *m_pXMLStrings
)
300 delete pos
.second
; // Check and delete content also ?
305 XMLFile::XMLFile( OString _sFileName
) // the file name, empty if created from memory stream
306 : XMLParentNode( nullptr )
307 , m_sFileName(std::move( _sFileName
))
309 m_aNodes_localize
.emplace( "bookmark"_ostr
, true );
310 m_aNodes_localize
.emplace( "variable"_ostr
, true );
311 m_aNodes_localize
.emplace( "paragraph"_ostr
, true );
312 m_aNodes_localize
.emplace( "h1"_ostr
, true );
313 m_aNodes_localize
.emplace( "h2"_ostr
, true );
314 m_aNodes_localize
.emplace( "h3"_ostr
, true );
315 m_aNodes_localize
.emplace( "h4"_ostr
, true );
316 m_aNodes_localize
.emplace( "h5"_ostr
, true );
317 m_aNodes_localize
.emplace( "h6"_ostr
, true );
318 m_aNodes_localize
.emplace( "note"_ostr
, true );
319 m_aNodes_localize
.emplace( "tip"_ostr
, true );
320 m_aNodes_localize
.emplace( "warning"_ostr
, true );
321 m_aNodes_localize
.emplace( "alt"_ostr
, true );
322 m_aNodes_localize
.emplace( "caption"_ostr
, true );
323 m_aNodes_localize
.emplace( "title"_ostr
, true );
324 m_aNodes_localize
.emplace( "link"_ostr
, true );
327 void XMLFile::Extract()
329 m_pXMLStrings
.reset( new XMLHashMap
);
330 SearchL10NElements( this );
333 void XMLFile::InsertL10NElement( XMLElement
* pElement
)
335 OString sId
, sLanguage("en-US"_ostr
);
338 if( pElement
->GetAttributeList() != nullptr )
340 for ( size_t j
= 0; j
< pElement
->GetAttributeList()->size(); j
++ )
342 const OString
sTempStr((*pElement
->GetAttributeList())[ j
]->GetName());
343 // Get the "id" Attribute
344 if (sTempStr
== "id")
346 sId
= (*pElement
->GetAttributeList())[ j
]->GetValue();
348 // Get the "xml-lang" Attribute
349 if (sTempStr
== XML_LANG
)
351 sLanguage
= (*pElement
->GetAttributeList())[j
]->GetValue();
358 fprintf(stdout
,"XMLFile::InsertL10NElement: No AttributeList found");
359 fprintf(stdout
,"++++++++++++++++++++++++++++++++++++++++++++++++++");
361 fprintf(stdout
,"++++++++++++++++++++++++++++++++++++++++++++++++++");
364 XMLHashMap::iterator pos
= m_pXMLStrings
->find( sId
);
365 if( pos
== m_pXMLStrings
->end() ) // No instance, create new one
367 pElem
= new LangHashMap
;
368 (*pElem
)[ sLanguage
]=pElement
;
369 m_pXMLStrings
->emplace( sId
, pElem
);
370 m_vOrder
.push_back( sId
);
372 else // Already there
375 if ( pElem
->count(sLanguage
) )
377 fprintf(stdout
,"Error: Duplicated entry. ID = %s LANG = %s in File %s\n", sId
.getStr(), sLanguage
.getStr(), m_sFileName
.getStr() );
380 (*pElem
)[ sLanguage
]=pElement
;
384 XMLFile::XMLFile( const XMLFile
& rObj
)
385 : XMLParentNode( rObj
)
386 , m_sFileName( rObj
.m_sFileName
)
390 m_aNodes_localize
= rObj
.m_aNodes_localize
;
391 m_vOrder
= rObj
.m_vOrder
;
395 XMLFile
& XMLFile::operator=(const XMLFile
& rObj
)
400 XMLParentNode::operator=(rObj
);
402 m_aNodes_localize
= rObj
.m_aNodes_localize
;
403 m_vOrder
= rObj
.m_vOrder
;
405 m_pXMLStrings
.reset();
407 if( rObj
.m_pXMLStrings
)
409 m_pXMLStrings
.reset( new XMLHashMap
);
410 for (auto const& pos
: *rObj
.m_pXMLStrings
)
412 LangHashMap
* pElem
=pos
.second
;
413 LangHashMap
* pNewelem
= new LangHashMap
;
414 for (auto const& pos2
: *pElem
)
416 (*pNewelem
)[ pos2
.first
] = new XMLElement( *pos2
.second
);
418 (*m_pXMLStrings
)[ pos
.first
] = pNewelem
;
424 void XMLFile::SearchL10NElements( XMLChildNode
*pCur
)
427 SearchL10NElements( this );
430 switch( pCur
->GetNodeType())
432 case XMLNodeType::XFILE
:
436 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
438 XMLChildNode
* pElement
= (*GetChildList())[ i
];
439 if( pElement
->GetNodeType() == XMLNodeType::ELEMENT
)
440 SearchL10NElements( pElement
);
445 case XMLNodeType::ELEMENT
:
448 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
449 const OString
sName(pElement
->GetName().toAsciiLowerCase());
450 if ( pElement
->GetAttributeList())
452 for ( size_t j
= 0 , cnt
= pElement
->GetAttributeList()->size(); j
< cnt
&& bInsert
; ++j
)
454 if ((*pElement
->GetAttributeList())[j
]->GetName() == "localize")
462 if ( bInsert
&& ( m_aNodes_localize
.find( sName
) != m_aNodes_localize
.end() ) )
463 InsertL10NElement(pElement
);
464 else if ( bInsert
&& pElement
->GetChildList() )
466 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
467 SearchL10NElements( (*pElement
->GetChildList())[ k
] );
477 bool XMLFile::CheckExportStatus( XMLChildNode
*pCur
)
479 static bool bStatusExport
= true;
482 CheckExportStatus( this );
484 switch( pCur
->GetNodeType())
486 case XMLNodeType::XFILE
:
490 for ( size_t i
= 0; i
< GetChildList()->size(); i
++ )
492 XMLChildNode
* pElement
= (*GetChildList())[ i
];
493 if( pElement
->GetNodeType() == XMLNodeType::ELEMENT
) CheckExportStatus( pElement
);//, i);
498 case XMLNodeType::ELEMENT
:
500 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
501 if (pElement
->GetName().equalsIgnoreAsciiCase("TOPIC"))
503 if ( pElement
->GetAttributeList())
505 for (size_t j
= 0 , cnt
= pElement
->GetAttributeList()->size(); j
< cnt
; ++j
)
507 const OString
tmpStr((*pElement
->GetAttributeList())[j
]->GetName());
508 if (tmpStr
.equalsIgnoreAsciiCase("STATUS"))
510 const OString
tmpStrVal((*pElement
->GetAttributeList())[j
]->GetValue());
511 if (!tmpStrVal
.equalsIgnoreAsciiCase("PUBLISH") &&
512 !tmpStrVal
.equalsIgnoreAsciiCase("DEPRECATED"))
514 bStatusExport
= false;
521 else if ( pElement
->GetChildList() )
523 for (size_t k
= 0; k
< pElement
->GetChildList()->size(); ++k
)
524 CheckExportStatus( (*pElement
->GetChildList())[k
] );
532 return bStatusExport
;
535 XMLElement::XMLElement(
536 OString _sName
, // the element name
537 XMLParentNode
*pParent
// parent node of this element
539 : XMLParentNode( pParent
)
540 , m_sElementName(std::move( _sName
))
544 XMLElement::XMLElement(const XMLElement
& rObj
)
545 : XMLParentNode( rObj
)
546 , m_sElementName( rObj
.m_sElementName
)
548 if ( rObj
.m_pAttributes
)
550 m_pAttributes
.reset( new XMLAttributeList
);
551 for ( size_t i
= 0; i
< rObj
.m_pAttributes
->size(); i
++ )
552 AddAttribute( (*rObj
.m_pAttributes
)[ i
]->GetName(), (*rObj
.m_pAttributes
)[ i
]->GetValue() );
556 XMLElement
& XMLElement::operator=(const XMLElement
& rObj
)
560 XMLParentNode::operator=(rObj
);
561 m_sElementName
= rObj
.m_sElementName
;
565 for ( size_t i
= 0; i
< m_pAttributes
->size(); i
++ )
566 delete (*m_pAttributes
)[ i
];
567 m_pAttributes
.reset();
569 if ( rObj
.m_pAttributes
)
571 m_pAttributes
.reset( new XMLAttributeList
);
572 for ( size_t i
= 0; i
< rObj
.m_pAttributes
->size(); i
++ )
573 AddAttribute( (*rObj
.m_pAttributes
)[ i
]->GetName(), (*rObj
.m_pAttributes
)[ i
]->GetValue() );
579 void XMLElement::AddAttribute( const OString
&rAttribute
, const OString
&rValue
)
581 if ( !m_pAttributes
)
582 m_pAttributes
.reset( new XMLAttributeList
);
583 m_pAttributes
->push_back( new XMLAttribute( rAttribute
, rValue
) );
586 void XMLElement::ChangeLanguageTag( const OString
&rValue
)
590 bool bWasSet
= false;
591 for (size_t i
= 0; i
< m_pAttributes
->size(); ++i
)
593 if ((*m_pAttributes
)[ i
]->GetName() == XML_LANG
)
595 (*m_pAttributes
)[ i
]->setValue(rValue
);
601 AddAttribute(XML_LANG
, rValue
);
603 XMLChildNodeList
* pCList
= GetChildList();
608 for ( size_t i
= 0; i
< pCList
->size(); i
++ )
610 XMLChildNode
* pNode
= (*pCList
)[ i
];
611 if( pNode
&& pNode
->GetNodeType() == XMLNodeType::ELEMENT
)
613 XMLElement
* pElem
= static_cast< XMLElement
* >(pNode
);
614 pElem
->ChangeLanguageTag( rValue
);
622 XMLElement::~XMLElement()
626 for ( size_t i
= 0; i
< m_pAttributes
->size(); i
++ )
627 delete (*m_pAttributes
)[ i
];
631 OString
XMLElement::ToOString()
633 OStringBuffer sBuffer
;
634 Print(this,sBuffer
,true);
635 return sBuffer
.makeStringAndClear();
638 void XMLElement::Print(XMLNode
*pCur
, OStringBuffer
& rBuffer
, bool bRootelement
) const
644 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
645 if ( pElement
->GetAttributeList())
647 if ( pElement
->GetChildList())
649 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
651 XMLChildNode
* pTmp
= (*pElement
->GetChildList())[ k
];
652 Print( pTmp
, rBuffer
, false);
659 switch( pCur
->GetNodeType())
661 case XMLNodeType::ELEMENT
:
663 XMLElement
*pElement
= static_cast<XMLElement
*>(pCur
);
665 if( !pElement
->GetName().equalsIgnoreAsciiCase("comment") )
667 rBuffer
.append( "<" );
668 rBuffer
.append( pElement
->GetName() );
669 if ( pElement
->GetAttributeList())
671 for ( size_t j
= 0; j
< pElement
->GetAttributeList()->size(); j
++ )
673 const OString
aAttrName( (*pElement
->GetAttributeList())[ j
]->GetName() );
674 if (aAttrName
!= XML_LANG
)
677 " " + aAttrName
+ "=\"" +
678 (*pElement
->GetAttributeList())[ j
]->GetValue() + "\"" );
682 if ( !pElement
->GetChildList())
683 rBuffer
.append( "/>" );
686 rBuffer
.append( ">" );
687 for ( size_t k
= 0; k
< pElement
->GetChildList()->size(); k
++ )
689 XMLChildNode
* pTmp
= (*pElement
->GetChildList())[ k
];
690 Print( pTmp
, rBuffer
, false);
692 rBuffer
.append( "</" + pElement
->GetName() + ">" );
697 case XMLNodeType::DATA
:
699 const XMLData
*pData
= static_cast<const XMLData
*>(pCur
);
700 rBuffer
.append( pData
->GetData() );
703 case XMLNodeType::COMMENT
:
705 const XMLComment
*pComment
= static_cast<const XMLComment
*>(pCur
);
706 rBuffer
.append( "<!--" + pComment
->GetComment() + "-->" );
709 case XMLNodeType::DEFAULT
:
711 const XMLDefault
*pDefault
= static_cast<const XMLDefault
*>(pCur
);
712 rBuffer
.append( pDefault
->GetDefault() );
722 fprintf(stdout
,"\n#+------Error: NULL Pointer in XMLELement::Print------+#\n");
733 OUString
lcl_pathnameToAbsoluteUrl(std::string_view rPathname
)
735 OUString sPath
= OStringToOUString(rPathname
, RTL_TEXTENCODING_UTF8
);
737 if (osl::FileBase::getFileURLFromSystemPath(sPath
, sUrl
)
738 != osl::FileBase::E_None
)
740 std::cerr
<< "Error: Cannot convert input pathname to URL\n";
741 std::exit(EXIT_FAILURE
);
744 if (osl_getProcessWorkingDir(&sCwd
.pData
) != osl_Process_E_None
)
746 std::cerr
<< "Error: Cannot determine cwd\n";
747 std::exit(EXIT_FAILURE
);
749 if (osl::FileBase::getAbsoluteFileURL(sCwd
, sUrl
, sUrl
)
750 != osl::FileBase::E_None
)
752 std::cerr
<< "Error: Cannot convert input URL to absolute URL\n";
753 std::exit(EXIT_FAILURE
);
760 SimpleXMLParser::SimpleXMLParser()
761 : m_pCurNode(nullptr)
762 , m_pCurData(nullptr)
764 m_aParser
= XML_ParserCreate( nullptr );
765 XML_SetUserData( m_aParser
, this );
766 XML_SetElementHandler( m_aParser
, StartElementHandler
, EndElementHandler
);
767 XML_SetCharacterDataHandler( m_aParser
, CharacterDataHandler
);
768 XML_SetCommentHandler( m_aParser
, CommentHandler
);
769 XML_SetDefaultHandler( m_aParser
, DefaultHandler
);
772 SimpleXMLParser::~SimpleXMLParser()
774 XML_ParserFree( m_aParser
);
777 void SimpleXMLParser::StartElementHandler(
778 void *userData
, const XML_Char
*name
, const XML_Char
**atts
)
780 static_cast<SimpleXMLParser
*>(userData
)->StartElement( name
, atts
);
783 void SimpleXMLParser::EndElementHandler(
784 void *userData
, const XML_Char
* /*name*/ )
786 static_cast<SimpleXMLParser
*>(userData
)->EndElement();
789 void SimpleXMLParser::CharacterDataHandler(
790 void *userData
, const XML_Char
*s
, int len
)
792 static_cast<SimpleXMLParser
*>(userData
)->CharacterData( s
, len
);
795 void SimpleXMLParser::CommentHandler(
796 void *userData
, const XML_Char
*data
)
798 static_cast<SimpleXMLParser
*>(userData
)->Comment( data
);
801 void SimpleXMLParser::DefaultHandler(
802 void *userData
, const XML_Char
*s
, int len
)
804 static_cast<SimpleXMLParser
*>(userData
)->Default( s
, len
);
807 void SimpleXMLParser::StartElement(
808 const XML_Char
*name
, const XML_Char
**atts
)
810 XMLElement
*pElement
= new XMLElement( OString(name
), m_pCurNode
);
811 m_pCurNode
= pElement
;
812 m_pCurData
= nullptr;
817 pElement
->AddAttribute( atts
[ i
], atts
[ i
+ 1 ] );
822 void SimpleXMLParser::EndElement()
824 m_pCurNode
= m_pCurNode
->GetParent();
825 m_pCurData
= nullptr;
828 void SimpleXMLParser::CharacterData( const XML_Char
*s
, int len
)
833 m_pCurData
= new XMLData( helper::UnQuotHTML(x
) , m_pCurNode
);
838 m_pCurData
->AddData( helper::UnQuotHTML(x
) );
843 void SimpleXMLParser::Comment( const XML_Char
*data
)
845 m_pCurData
= nullptr;
846 new XMLComment( OString( data
), m_pCurNode
);
849 void SimpleXMLParser::Default( const XML_Char
*s
, int len
)
851 m_pCurData
= nullptr;
852 new XMLDefault(OString( s
, len
), m_pCurNode
);
855 bool SimpleXMLParser::Execute( const OString
&rFileName
, XMLFile
* pXMLFile
)
857 m_aErrorInformation
.m_eCode
= XML_ERROR_NONE
;
858 m_aErrorInformation
.m_nLine
= 0;
859 m_aErrorInformation
.m_nColumn
= 0;
860 m_aErrorInformation
.m_sMessage
= "ERROR: Unable to open file "_ostr
;
861 m_aErrorInformation
.m_sMessage
+= rFileName
;
863 OUString
aFileURL(lcl_pathnameToAbsoluteUrl(rFileName
));
866 if (osl_openFile(aFileURL
.pData
, &h
, osl_File_OpenFlag_Read
)
873 oslFileError e
= osl_getFileSize(h
, &s
);
875 if (e
== osl_File_E_None
)
877 e
= osl_mapFile(h
, &p
, s
, 0, 0);
879 if (e
!= osl_File_E_None
)
885 pXMLFile
->SetName( rFileName
);
887 m_pCurNode
= pXMLFile
;
888 m_pCurData
= nullptr;
890 m_aErrorInformation
.m_eCode
= XML_ERROR_NONE
;
891 m_aErrorInformation
.m_nLine
= 0;
892 m_aErrorInformation
.m_nColumn
= 0;
893 if ( !pXMLFile
->GetName().isEmpty())
895 m_aErrorInformation
.m_sMessage
= "File " + pXMLFile
->GetName() + " parsed successfully";
898 m_aErrorInformation
.m_sMessage
= "XML-File parsed successfully"_ostr
;
900 bool result
= XML_Parse(m_aParser
, static_cast< char * >(p
), s
, true);
903 m_aErrorInformation
.m_eCode
= XML_GetErrorCode( m_aParser
);
904 m_aErrorInformation
.m_nLine
= XML_GetErrorLineNumber( m_aParser
);
905 m_aErrorInformation
.m_nColumn
= XML_GetErrorColumnNumber( m_aParser
);
907 m_aErrorInformation
.m_sMessage
= "ERROR: "_ostr
;
908 if ( !pXMLFile
->GetName().isEmpty())
909 m_aErrorInformation
.m_sMessage
+= pXMLFile
->GetName();
911 m_aErrorInformation
.m_sMessage
+= "XML-File (";
913 m_aErrorInformation
.m_sMessage
+=
914 OString::number(sal::static_int_cast
< sal_Int64
>(m_aErrorInformation
.m_nLine
)) + "," +
915 OString::number(sal::static_int_cast
< sal_Int64
>(m_aErrorInformation
.m_nColumn
)) + "): ";
917 switch (m_aErrorInformation
.m_eCode
)
919 case XML_ERROR_NO_MEMORY
:
920 m_aErrorInformation
.m_sMessage
+= "No memory";
922 case XML_ERROR_SYNTAX
:
923 m_aErrorInformation
.m_sMessage
+= "Syntax";
925 case XML_ERROR_NO_ELEMENTS
:
926 m_aErrorInformation
.m_sMessage
+= "No elements";
928 case XML_ERROR_INVALID_TOKEN
:
929 m_aErrorInformation
.m_sMessage
+= "Invalid token";
931 case XML_ERROR_UNCLOSED_TOKEN
:
932 m_aErrorInformation
.m_sMessage
+= "Unclosed token";
934 case XML_ERROR_PARTIAL_CHAR
:
935 m_aErrorInformation
.m_sMessage
+= "Partial char";
937 case XML_ERROR_TAG_MISMATCH
:
938 m_aErrorInformation
.m_sMessage
+= "Tag mismatch";
940 case XML_ERROR_DUPLICATE_ATTRIBUTE
:
941 m_aErrorInformation
.m_sMessage
+= "Duplicated attribute";
943 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT
:
944 m_aErrorInformation
.m_sMessage
+= "Junk after doc element";
946 case XML_ERROR_PARAM_ENTITY_REF
:
947 m_aErrorInformation
.m_sMessage
+= "Param entity ref";
949 case XML_ERROR_UNDEFINED_ENTITY
:
950 m_aErrorInformation
.m_sMessage
+= "Undefined entity";
952 case XML_ERROR_RECURSIVE_ENTITY_REF
:
953 m_aErrorInformation
.m_sMessage
+= "Recursive entity ref";
955 case XML_ERROR_ASYNC_ENTITY
:
956 m_aErrorInformation
.m_sMessage
+= "Async_entity";
958 case XML_ERROR_BAD_CHAR_REF
:
959 m_aErrorInformation
.m_sMessage
+= "Bad char ref";
961 case XML_ERROR_BINARY_ENTITY_REF
:
962 m_aErrorInformation
.m_sMessage
+= "Binary entity";
964 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
:
965 m_aErrorInformation
.m_sMessage
+= "Attribute external entity ref";
967 case XML_ERROR_MISPLACED_XML_PI
:
968 m_aErrorInformation
.m_sMessage
+= "Misplaced xml pi";
970 case XML_ERROR_UNKNOWN_ENCODING
:
971 m_aErrorInformation
.m_sMessage
+= "Unknown encoding";
973 case XML_ERROR_INCORRECT_ENCODING
:
974 m_aErrorInformation
.m_sMessage
+= "Incorrect encoding";
976 case XML_ERROR_UNCLOSED_CDATA_SECTION
:
977 m_aErrorInformation
.m_sMessage
+= "Unclosed cdata section";
979 case XML_ERROR_EXTERNAL_ENTITY_HANDLING
:
980 m_aErrorInformation
.m_sMessage
+= "External entity handling";
982 case XML_ERROR_NOT_STANDALONE
:
983 m_aErrorInformation
.m_sMessage
+= "Not standalone";
992 osl_unmapMappedFile(h
, p
, s
);
1001 icu::UnicodeString
lcl_QuotRange(
1002 const icu::UnicodeString
& rString
, const sal_Int32 nStart
,
1003 const sal_Int32 nEnd
, bool bInsideTag
= false )
1005 icu::UnicodeString sReturn
;
1006 assert( nStart
< nEnd
);
1007 assert( nStart
>= 0 );
1008 assert( nEnd
<= rString
.length() );
1009 for (sal_Int32 i
= nStart
; i
< nEnd
; ++i
)
1014 sReturn
.append("<");
1017 sReturn
.append(">");
1021 sReturn
.append(""");
1023 sReturn
.append(rString
[i
]);
1026 if (rString
.startsWith("&", i
, 5))
1027 sReturn
.append('&');
1029 sReturn
.append("&");
1032 sReturn
.append(rString
[i
]);
1039 bool lcl_isTag( const icu::UnicodeString
& rString
)
1041 static const int nSize
= 20;
1042 static const icu::UnicodeString vTags
[nSize
] = {
1043 "ahelp", "link", "item", "emph", "defaultinline",
1044 "switchinline", "caseinline", "variable",
1045 "bookmark_value", "image", "object",
1046 "embedvar", "alt", "sup", "sub",
1047 "menuitem", "keycode", "input", "literal", "widget"
1050 for( int nIndex
= 0; nIndex
< nSize
; ++nIndex
)
1052 if( rString
.startsWith("<" + vTags
[nIndex
]) ||
1053 rString
== "</" + vTags
[nIndex
] + ">" )
1057 return rString
== "<br/>" || rString
=="<help-id-missing/>";
1060 } /// anonymous namespace
1062 OString
XMLUtil::QuotHTML( const OString
&rString
)
1064 if( o3tl::trim(rString
).empty() )
1066 UErrorCode nIcuErr
= U_ZERO_ERROR
;
1067 static const sal_uInt32 nSearchFlags
=
1068 UREGEX_DOTALL
| UREGEX_CASE_INSENSITIVE
;
1069 static const icu::UnicodeString
sSearchPat( "<[/]\?\?[a-z_-]+?(?:| +[a-z]+?=\".*?\") *[/]\?\?>" );
1071 const OUString sOUSource
= OStringToOUString(rString
, RTL_TEXTENCODING_UTF8
);
1072 icu::UnicodeString
sSource(
1073 reinterpret_cast<const UChar
*>(
1074 sOUSource
.getStr()), sOUSource
.getLength() );
1076 icu::RegexMatcher
aRegexMatcher( sSearchPat
, nSearchFlags
, nIcuErr
);
1077 aRegexMatcher
.reset( sSource
);
1079 icu::UnicodeString sReturn
;
1080 int32_t nEndPos
= 0;
1081 int32_t nStartPos
= 0;
1082 while( aRegexMatcher
.find(nStartPos
, nIcuErr
) && U_SUCCESS(nIcuErr
) )
1084 nStartPos
= aRegexMatcher
.start(nIcuErr
);
1085 if ( nEndPos
< nStartPos
)
1086 sReturn
.append(lcl_QuotRange(sSource
, nEndPos
, nStartPos
));
1087 nEndPos
= aRegexMatcher
.end(nIcuErr
);
1088 icu::UnicodeString sMatch
= aRegexMatcher
.group(nIcuErr
);
1089 if( lcl_isTag(sMatch
) )
1091 sReturn
.append("<");
1092 sReturn
.append(lcl_QuotRange(sSource
, nStartPos
+1, nEndPos
-1, true));
1093 sReturn
.append(">");
1096 sReturn
.append(lcl_QuotRange(sSource
, nStartPos
, nEndPos
));
1097 nStartPos
= nEndPos
;
1099 if( nEndPos
< sSource
.length() )
1100 sReturn
.append(lcl_QuotRange(sSource
, nEndPos
, sSource
.length()));
1101 sReturn
.append('\0');
1104 reinterpret_cast<const sal_Unicode
*>(sReturn
.getBuffer()),
1105 RTL_TEXTENCODING_UTF8
);
1108 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */