1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: xmlparse.hxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #ifndef BOOTSTRP_XMLPARSE_HXX
32 #define BOOTSTRP_XMLPARSE_HXX
38 #include <external/expat/xmlparse.h>
40 #include <rtl/ustring.hxx>
41 #include <rtl/ustrbuf.hxx>
42 #include "tools/string.hxx"
43 #include "tools/list.hxx"
44 #define ENABLE_BYTESTRING_STREAM_OPERATORS
45 #include "tools/stream.hxx"
46 #include "tools/isofallback.hxx"
48 #include "xmlutil.hxx"
57 using namespace ::rtl
;
60 #include <hash_map> /* std::hashmap*/
61 #include <deque> /* std::deque*/
62 #include <iterator> /* std::iterator*/
63 #include <list> /* std::list*/
64 #include <vector> /* std::vector*/
65 #define XML_NODE_TYPE_FILE 0x001
66 #define XML_NODE_TYPE_ELEMENT 0x002
67 #define XML_NODE_TYPE_DATA 0x003
68 #define XML_NODE_TYPE_COMMENT 0x004
69 #define XML_NODE_TYPE_DEFAULT 0x005
70 #define MAX_LANGUAGES 99
73 //#define TESTDRIVER /* use xml2gsi testclass */
74 //-------------------------------------------------------------------------
76 /** Holds data of Attributes
78 class XMLAttribute
: public String
84 /// creates an attribute
86 const String
&rName
, // attributes name
87 const String
&rValue
// attributes data
89 : String( rName
), sValue( rValue
) {}
91 /// getting value of an attribue
92 const String
&GetValue() { return sValue
; }
94 void setValue(const String
&rValue
){sValue
=rValue
;}
96 /// returns true if two attributes are equal and have the same value
98 const XMLAttribute
&rAttribute
// the attribute which has to be equal
101 return (( rAttribute
== *this ) && ( rAttribute
.sValue
== sValue
));
105 DECLARE_LIST( XMLAttributeList
, XMLAttribute
* )
107 //-------------------------------------------------------------------------
109 /** Virtual base to handle different kinds of XML nodes
117 virtual USHORT
GetNodeType() = 0;
118 virtual ~XMLNode() {}
121 //-------------------------------------------------------------------------
123 /** Virtual base to handle different kinds of child nodes
125 class XMLChildNode
: public XMLNode
128 XMLParentNode
*pParent
;
131 XMLChildNode( XMLParentNode
*pPar
);
132 XMLChildNode():pParent( NULL
){};
133 XMLChildNode( const XMLChildNode
& obj
);
134 XMLChildNode
& operator=(const XMLChildNode
& obj
);
136 virtual USHORT
GetNodeType() = 0;
138 /// returns the parent of this node
139 XMLParentNode
*GetParent() { return pParent
; }
140 virtual ~XMLChildNode(){};
143 DECLARE_LIST( XMLChildNodeList
, XMLChildNode
* )
145 //-------------------------------------------------------------------------
147 /** Virtual base to handle different kinds of parent nodes
151 class XMLParentNode
: public XMLChildNode
154 XMLChildNodeList
*pChildList
;
158 XMLParentNode( XMLParentNode
*pPar
)
159 : XMLChildNode( pPar
), pChildList( NULL
)
162 XMLParentNode(): pChildList(NULL
){
165 XMLParentNode( const XMLParentNode
& );
167 XMLParentNode
& operator=(const XMLParentNode
& obj
);
168 virtual ~XMLParentNode();
172 virtual USHORT
GetNodeType() = 0;
174 /// returns child list of this node
175 XMLChildNodeList
*GetChildList() { return pChildList
; }
179 XMLChildNode
*pChild
/// the new child
183 XMLChildNode
*pChild
, int pos
/// the new child
186 virtual int GetPosition( ByteString id
);
187 int RemoveChild( XMLElement
*pRefElement
);
188 void RemoveAndDeleteAllChilds();
190 /// returns a child element which matches the given one
191 XMLElement
*GetChildElement(
192 XMLElement
*pRefElement
// the reference elelement
196 //-------------------------------------------------------------------------
198 DECLARE_LIST( XMLStringList
, XMLElement
* )
200 /// Mapping numeric Language code <-> XML Element
201 typedef std::hash_map
< ByteString
,XMLElement
* , hashByteString
,equalByteString
> LangHashMap
;
203 /// Mapping XML Element string identifier <-> Language Map
204 typedef std::hash_map
<ByteString
, LangHashMap
* ,
205 hashByteString
,equalByteString
> XMLHashMap
;
207 /// Mapping iso alpha string code <-> iso numeric code
208 typedef std::hash_map
<ByteString
, int, hashByteString
,equalByteString
> HashMap
;
210 /// Mapping XML tag names <-> have localizable strings
211 typedef std::hash_map
<ByteString
, BOOL
,
212 hashByteString
,equalByteString
> TagMap
;
214 /** Holds information of a XML file, is root node of tree
218 class XMLFile
: public XMLParentNode
223 const String
&rFileName
// the file name, empty if created from memory stream
225 XMLFile( const XMLFile
& obj
) ;
228 ByteString
* GetGroupID(std::deque
<ByteString
> &groupid
);
229 void Print( XMLNode
*pCur
= NULL
, USHORT nLevel
= 0 );
230 virtual void SearchL10NElements( XMLParentNode
*pCur
, int pos
= 0 );
231 void Extract( XMLFile
*pCur
= NULL
);
233 // void static Signal_handler(int signo);//void*,oslSignalInfo * pInfo);
234 void showType(XMLParentNode
* node
);
236 XMLHashMap
* GetStrings(){return XMLStrings
;}
237 BOOL
Write( ByteString
&rFilename
);
238 BOOL
Write( ofstream
&rStream
, XMLNode
*pCur
= NULL
);
240 bool CheckExportStatus( XMLParentNode
*pCur
= NULL
);// , int pos = 0 );
242 XMLFile
& operator=(const XMLFile
& obj
);
244 virtual USHORT
GetNodeType();
246 /// returns file name
247 const String
&GetName() { return sFileName
; }
248 void SetName( const String
&rFilename
) { sFileName
= rFilename
; }
249 void SetFullName( const String
&rFullFilename
) { sFullName
= rFullFilename
; }
250 const std::vector
<ByteString
> getOrder(){ return order
; }
253 // writes a string as UTF8 with dos line ends to a given stream
254 void WriteString( ofstream
&rStream
, const String
&sString
);
256 // quotes the given text for writing to a file
257 void QuotHTML( String
&rString
);
259 void InsertL10NElement( XMLElement
* pElement
);
265 const ByteString ID
,OLDREF
,XML_LANG
;
267 TagMap nodes_localize
;
268 XMLHashMap
* XMLStrings
;
270 std::vector
<ByteString
> order
;
273 /// An Utility class for XML
274 /// See RFC 3066 / #i8252# for ISO codes
278 /// Quot the XML characters and replace \n \t
279 static void QuotHTML( String
&rString
);
281 /// UnQuot the XML characters and restore \n \t
282 static void UnQuotHTML ( String
&rString
);
284 /// Return the numeric iso language code
285 //USHORT GetLangByIsoLang( const ByteString &rIsoLang );
287 /// Return the alpha strings representation
288 ByteString
GetIsoLangByIndex( USHORT nIndex
);
290 static XMLUtil
& Instance();
296 /// Mapping iso alpha string code <-> iso numeric code
299 /// Mapping iso numeric code <-> iso alpha string code
300 ByteString isoArray
[MAX_LANGUAGES
];
302 static void UnQuotData( String
&rString
);
303 static void UnQuotTags( String
&rString
);
306 XMLUtil(const XMLUtil
&);
312 //-------------------------------------------------------------------------
314 /** Hold information of an element node
316 class XMLElement
: public XMLParentNode
320 XMLAttributeList
*pAttributes
;
330 void Print(XMLNode
*pCur
, OUStringBuffer
& buffer
, bool rootelement
);
332 /// create a element node
335 const String
&rName
, // the element name
336 XMLParentNode
*Parent
// parent node of this element
337 ): XMLParentNode( Parent
),
338 sElementName( rName
),
350 XMLElement(const XMLElement
&);
352 XMLElement
& operator=(const XMLElement
& obj
);
353 /// returns node type XML_NODE_ELEMENT
354 virtual USHORT
GetNodeType();
356 /// returns element name
357 const String
&GetName() { return sElementName
; }
359 /// returns list of attributes of this element
360 XMLAttributeList
*GetAttributeList() { return pAttributes
; }
362 /// adds a new attribute to this element, typically used by parser
363 void AddAttribute( const String
&rAttribute
, const String
&rValue
);
365 void ChangeLanguageTag( const String
&rValue
);
366 // Return a ASCII String representation of this object
369 // Return a Unicode String representation of this object
370 OUString
ToOUString();
372 bool Equals(OUString refStr
);
374 /// returns a attribute
375 XMLAttribute
*GetAttribute(
376 const String
&rName
// the attribute name
378 void SetProject ( ByteString prj
){ project
= prj
; }
379 void SetFileName ( ByteString fn
){ filename
= fn
; }
380 void SetId ( ByteString theId
){ id
= theId
; }
381 void SetResourceType ( ByteString rt
){ resourceType
= rt
; }
382 void SetLanguageId ( ByteString lid
){ languageId
= lid
; }
383 void SetPos ( int nPos_in
){ nPos
= nPos_in
; }
384 void SetOldRef ( ByteString sOldRef_in
){ sOldRef
= sOldRef_in
; }
386 virtual int GetPos() { return nPos
; }
387 ByteString
GetProject() { return project
; }
388 ByteString
GetFileName() { return filename
; }
389 ByteString
GetId() { return id
; }
390 ByteString
GetOldref() { return sOldRef
; }
391 ByteString
GetResourceType(){ return resourceType
; }
392 ByteString
GetLanguageId() { return languageId
; }
396 //-------------------------------------------------------------------------
399 /** Holds character data
401 class XMLData
: public XMLChildNode
408 /// create a data node
410 const String
&rData
, // the initial data
411 XMLParentNode
*Parent
// the parent node of this data, typically a element node
413 : XMLChildNode( Parent
), sData( rData
) , isNewCreated ( false ){}
415 const String
&rData
, // the initial data
416 XMLParentNode
*Parent
, // the parent node of this data, typically a element node
419 : XMLChildNode( Parent
), sData( rData
) , isNewCreated ( newCreated
){}
421 XMLData(const XMLData
& obj
);
423 XMLData
& operator=(const XMLData
& obj
);
424 virtual USHORT
GetNodeType();
427 const String
&GetData() { return sData
; }
429 bool isNew() { return isNewCreated
; }
430 /// adds new character data to the existing one
432 const String
&rData
// the new data
439 //-------------------------------------------------------------------------
443 class XMLComment
: public XMLChildNode
449 /// create a comment node
451 const String
&rComment
, // the comment
452 XMLParentNode
*Parent
// the parent node of this comemnt, typically a element node
454 : XMLChildNode( Parent
), sComment( rComment
) {}
456 virtual USHORT
GetNodeType();
458 XMLComment( const XMLComment
& obj
);
460 XMLComment
& operator=(const XMLComment
& obj
);
462 /// returns the comment
463 const String
&GetComment() { return sComment
; }
466 //-------------------------------------------------------------------------
468 /** Holds additional file content like those for which no handler exists
470 class XMLDefault
: public XMLChildNode
476 /// create a comment node
478 const String
&rDefault
, // the comment
479 XMLParentNode
*Parent
// the parent node of this comemnt, typically a element node
481 : XMLChildNode( Parent
), sDefault( rDefault
) {}
483 XMLDefault(const XMLDefault
& obj
);
485 XMLDefault
& operator=(const XMLDefault
& obj
);
487 /// returns node type XML_NODE_TYPE_COMMENT
488 virtual USHORT
GetNodeType();
490 /// returns the comment
491 const String
&GetDefault() { return sDefault
; }
494 //-------------------------------------------------------------------------
496 /** struct for error information, used by class SimpleXMLParser
499 XML_Error eCode
; // the error code
500 ULONG nLine
; // error line number
501 ULONG nColumn
; // error column number
502 String sMessage
; // readable error message
505 //-------------------------------------------------------------------------
507 /** validating xml parser, creates a document tree with xml nodes
511 class SimpleXMLParser
515 XMLError aErrorInformation
;
518 XMLParentNode
*pCurNode
;
522 static void StartElementHandler( void *userData
, const XML_Char
*name
, const XML_Char
**atts
);
523 static void EndElementHandler( void *userData
, const XML_Char
*name
);
524 static void CharacterDataHandler( void *userData
, const XML_Char
*s
, int len
);
525 static void CommentHandler( void *userData
, const XML_Char
*data
);
526 static void DefaultHandler( void *userData
, const XML_Char
*s
, int len
);
529 void StartElement( const XML_Char
*name
, const XML_Char
**atts
);
530 void EndElement( const XML_Char
*name
);
531 void CharacterData( const XML_Char
*s
, int len
);
532 void Comment( const XML_Char
*data
);
533 void Default( const XML_Char
*s
, int len
);
537 /// creates a new parser
541 /// parse a file, returns NULL on criticall errors
543 const String
&rFullFileName
,
544 const String
&rFileName
, // the file name
545 XMLFile
*pXMLFileIn
// the XMLFile
548 /// parse a memory stream, returns NULL on criticall errors
550 SvMemoryStream
*pStream
// the stream
553 /// returns an error struct
554 const XMLError
&GetError() { return aErrorInformation
; }