1 'encoding UTF-8 Do not remove or change this line!
2 '**************************************************************************
3 '* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 '* Copyright 2008 by Sun Microsystems, Inc.
7 '* OpenOffice.org - a multi-platform office productivity suite
9 '* $RCSfile: t_xml2.inc,v $
13 '* last change: $Author: jsi $ $Date: 2008-06-13 10:27:09 $
15 '* This file is part of OpenOffice.org.
17 '* OpenOffice.org is free software: you can redistribute it and/or modify
18 '* it under the terms of the GNU Lesser General Public License version 3
19 '* only, as published by the Free Software Foundation.
21 '* OpenOffice.org is distributed in the hope that it will be useful,
22 '* but WITHOUT ANY WARRANTY; without even the implied warranty of
23 '* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 '* GNU Lesser General Public License version 3 for more details
25 '* (a copy is included in the LICENSE file that accompanied this code).
27 '* You should have received a copy of the GNU Lesser General Public License
28 '* version 3 along with OpenOffice.org. If not, see
29 '* <http://www.openoffice.org/license.html>
30 '* for a copy of the LGPLv3 License.
32 '/************************************************************************
34 '* owner : helge.delfs@sun.com
36 '* short description : XML search routines II
38 '************************************************************************
40 ' #1 GetXMLValueLineExtra ' DEPRECATED depending on t_xml1.inc::GetXMLValueGlobal
41 ' #1 XMLWellFormed ' Checks the well formness of a XML file.
42 ' #1 GetXMLValue2 ' OBSOLETE: XML search routine (as TT has no SAX included we have used that rountine)
43 ' #1 GetBodiesItemStyleName ' DEPRECATED used by ../xml/level1/inc/sxw7_02.inc
44 ' #1 GetLineInXMLBody ' DEPRECATED used by ../xml/level1/inc/sxw7_02.inc and ../sxw7_03.inc
45 ' #1 GetItemStyleName ' DEPRECATED used by ../xml/level1/inc/sxw7_01.inc
46 ' #1 GetXMLElementPath ' Gets the elementpath in a DOM tree (mostly used for [automatic-]styles)
47 ' #1 fWhereIsXMLElementInBody ' Gets the elementpath in a DOM tree in the second level (mostly body elements)
49 '\***********************************************************************
51 function GetXMLValueLineExtra ( sXMLfile as String, sXMLsectionMaster as String, sXMLsection as String, sGroupTyp as String, sGroupName as String ) as String
52 '///+ Input:<ul><li>sXMLfile => Filename with full path</li>
53 '///+ <li>sXMLsectionMaster => The master-section (mostly in OpenOffice.org the filename without extension)</li>
54 '///+ <li>sXMLsection => Full way to the item</li>
55 '///+ <li>sGroupTyp => First entry after tag</li>
56 '///+ <li>sGroupName => Value of first entry</li></ul>
57 GetXMLValueLineExtra = GetXMLValueGlobal ( sXMLfile, sXMLsectionMaster, sXMLsection + " " + sGroupTyp + "=" + Chr(34) + sGroupName + Chr(34),,, TRUE )
60 '-------------------------------------------------------------------------
62 function GetXMLItemInstance ( sXMLfile as String, sXMLsectionMaster as String, sXMLsection as String ) as String
63 '/// Input:<ul><li>sXMLfile => Filename with full path</li>
64 '///+ <li>sXMLsectionMaster => The master-section (mostly in OpenOffice.org the filename without extension)</li>
65 '///+ <li>sXMLsection => Full way to the item</li></ul>
66 Dim sLine, sLine2 as String
67 Dim iStart, iEnd, i, iStr, iLen as Integer
69 sLine = GetXMLValueLine ( sXMLfile, sXMLsectionMaster, sXMLsection )
70 sLine2 = lcase ( sLine )
71 iStr = Instr ( sLine2, "instance" )
76 GetXMLItemInstance = "false"
81 if Mid ( sLine2, i, 1 ) = Chr(34) then iStart = i
83 if Mid ( sLine2, i, 1 ) = Chr(34) then
91 GetXMLItemInstance = ""
93 GetXMLItemInstance = Mid ( sLine, iStart+1, iEnd - iStart - 1 )
97 '-------------------------------------------------------------------------
99 function GetXMLItemEncoding ( sXMLfile as String, sXMLsectionMaster as String, sXMLsection as String ) as String
100 '/// Input: <ul><li>sXMLfile => Filename with full path</li>
101 '///+ <li>sXMLsectionMaster => The master-section (mostly in OpenOffice.org the filename without extension)</li>
102 '///+ <li>sXMLsection => Full way to the item</li></ul>
103 Dim sLine, sLine2 as String
104 Dim iStart, iEnd, i, iStr, iLen as Integer
106 sLine = GetXMLValueLine ( sXMLfile, sXMLsectionMaster, sXMLsection )
107 sLine2 = lcase ( sLine )
108 iStr = Instr ( sLine2, "encoding" )
109 iLen = len ( sLine2 )
113 GetXMLItemEncoding = "false"
118 if Mid ( sLine2, i, 1 ) = Chr(34) then iStart = i
120 if Mid ( sLine2, i, 1 ) = Chr(34) then
128 GetXMLItemEncoding = ""
130 GetXMLItemEncoding = Mid ( sLine, iStart+1, iEnd - iStart - 1 )
134 '-------------------------------------------------------------------------
136 function XMLWellFormed ( sFileName as String, optional bDebug as Boolean ) as Boolean
137 '/// Input: File name as string
138 '/// <i>(obsolete: Debug)</i>
139 '/// Return: TRUE or FALSE
140 Dim InputstreamXML as string
141 XMLWellFormed = FALSE
142 if IsMissing(bDebug) = FALSE then
143 warnlog "Debugmode 'XMLWellFormed' is obsolete. FUNCTION is now a native TestTool function!"
145 InputstreamXML = SAXCheckWellFormed(sFileName)
146 if InputstreamXML <> "" then
147 warnlog "Problem was found: " & InputstreamXML
149 printlog "File: '" & sFileName & "' is well formed."
154 '-------------------------------------------------------------------------
156 function GetXMLValue2 ( sXMLfile as String, sXMLsectionMaster as String, sXMLsection as String ) as String
157 '/// Input: <ul><li>sXMLfile => Filename with full path</li></ul>
158 Dim lsList(10) as String
159 Dim sInterim, sInterim2 as String
160 Dim i, ii, iLen, ibegin ,iEnd as Integer
162 sInterim = GetXMLValueLine ( sXMLfile, sXMLsectionMaster, sXMLsection,,, )
163 if sInterim = "" then
168 i = ExtractSections ( sXMLsection, lsList() )
169 sInterim2 = lsList(i)
170 iLen = len ( sInterim )
171 ii = len ( sInterim2 ) + 1
174 if mid( sInterim, i, 1 ) = ">" then
179 iEnd = ( iLen - ii - 1 ) - iBegin
180 GetXMLValue2 = Mid ( sInterim, iBegin, iEnd )
183 '-------------------------------------------------------------------------
185 function GetBodiesItemStyleName ( AXMLfile as string , WhichItem as string , HowOften as integer , OPTIONAL B ) as string
186 ' Author: Joerg Sievers
187 '/// With GetBodiesItemStyleName you can get the NAME of a STYLE from
188 '///+ a item in the BODY of a OpenOffice.org XML-file.
189 '/// <blockquote>OPTIONAL PARAMETER
190 '///+ If there are more than one "style-name" tags in ONE line, you
191 '///+ have to use an optional parameter.
192 '/// <i>see also</i>:<ul>
193 '///+ <li>GetXMLValueLineExtra</li></ul>
194 '/// <u>simple Example</u>:
195 '///+ String = GetBodiesStyleName ("example.sxc") , "table:table-row" , 2)
196 '///+ Return: The second STYLE-NAME of the 'table-row'-tag in the office:body
197 '/// <u>Example with optional parameter</u>:
198 '///+ XML-line you want to parse for the text:span style-name and it is the second
199 '///+ text:span-attribute in the office:body-section:
201 '///+ <text:p text:style-name="P1"><text:span text:style-name="T1">The first text</text:span></text:p>
202 '///+ <text:p text:style-name="P4"><text:span text:style-name="T4">Just a text</text:span></text:p>
204 '///+ then you have to use:
205 '///+ String = GetBodiesStyleName ("example.sxc") , "table:table-row" , 2 , 1)
206 '///+ The first ineteger (2) is for the second <text:span-entry in the file.
207 '///+ The OPTIONAL second integer is the 'ONE' AFTER the first tag in the same line.
208 Dim FileNum as integer
209 Dim XMLRawLine as string
210 Dim XMLCLearedLine as string
213 Dim FoundEntry as boolean
214 Dim DelLeft as integer
215 Dim ItemPosInString as integer
216 Dim XMLCLearedLineWithoutLeft as string
217 Dim DelRight as integer
218 Dim XMLCLearedAndSeperatedLine as string
220 if Dir ( AXMLfile ) = "" then
221 warnlog "GetBodiesItemStyleName(...) : '" & AXMLfile & "' is missing!"
225 WhichItem = "<" & WhichItem
232 Open AXMLfile For Input As #FileNum
234 Do until EOF(#FileNum) = TRUE OR FoundEntry = TRUE
236 Line input #FileNum, XMLRawLine
237 ' deleting tabs and spaces
238 XMLCLearedLine = TrimTab ( Trim ( XMLRawLine ) )
239 ' jumping to the office:body
240 if InStr(XMLCLearedLine , "<office:document-") <> 0 then
242 Do until EOF(#FileNum) = TRUE OR FoundEntry = TRUE
243 Line input #FileNum, XMLRawLine
244 XMLCLearedLine = TrimTab ( Trim ( XMLRawLine ) )
245 ' if the count of the item is the same as the one searching for...
246 ItemPosInString = InStr(XMLCLearedLine , WhichItem)
247 if ItemPosInString <> 0 then
248 ' warnlog "Debug: ItemPosInString = '" & ItemPosInString & "'"
249 ' Is there more than one time the 'WhichItem' in this line?
250 ' (MUST BE GIVEN BY OPTIONAL PARAMETER!)
251 if IsMissing(B) = FALSE then
253 DelLeft = InStr(XMLCLearedLine , "style-name=" & CHR$(34))
254 XMLCLearedLineWithoutLeft = Mid(XMLCLearedLine, DelLeft+12)
255 XMLCLearedLine = XMLCLearedLineWithoutLeft
260 ' searching for exakt attribute stylename="
261 DelLeft = InStr(XMLCLearedLine , "style-name=" & CHR$(34))
262 ' extrcting, stripping all things after the style-name-attribute (=12 chars)
263 XMLCLearedLineWithoutLeft = Mid(XMLCLearedLine, DelLeft+12)
264 ' extracting the real name without the "
265 DelRight = InStr(XMLCLearedLineWithoutLeft , CHR$(34))
267 XMLCLearedAndSeperatedLine = Mid(XMLCLearedLineWithoutLeft, 1 , DelRight-1)
268 GetBodiesItemStyleName = GetBodiesItemStyleName+XMLCLearedAndSeperatedLine
279 '-------------------------------------------------------------------------
281 function GetLineInXMLBody ( AXMLfile as string , WhichItem as string , HowOften as integer) as string
282 'Author: Joerg Sievers
283 '/// With this function you can extract a whole line in <office:body>
284 '///+ of a XML document. It is important to give this routine the
285 '///+ correct <ITEM and the count in <office:body>.
286 '/// <u>simple Example</u>:
287 '///+ We want to find the 2nd (!) table:table-row item
288 '///+ <table:table-row table:style-name="ro2" table:visibility="collapse">
289 '///+ String = GetLineInXMLBody(gOfficePath & ConvertPath("Content.xml") , "table:table-row" , 2)
290 '///+ Return: The whole line of the second 'table:table-row'-item.
291 Dim FileNum as integer
292 Dim XMLRawLine as string
293 Dim XMLCLearedLine as string
295 Dim FoundEntry as boolean
296 Dim ItemPosInString as integer
298 if Dir ( AXMLfile ) = "" then
299 warnlog "GetLineInXMLBody(...) : '" & AXMLfile & "' is missing!"
303 WhichItem = "<" & WhichItem
310 Open AXMLfile For Input As #FileNum
312 Do until EOF(#FileNum) = TRUE OR FoundEntry = TRUE
314 Line input #FileNum, XMLRawLine
315 ' deleting tabs and spaces
316 XMLCLearedLine = TrimTab ( Trim ( XMLRawLine ) )
317 ' jumping to the office:body
318 if InStr(XMLCLearedLine , "office:body") <> 0 then
319 Do until EOF(#FileNum) = TRUE OR FoundEntry = TRUE
320 Line input #FileNum, XMLRawLine
321 XMLCLearedLine = TrimTab ( Trim ( XMLRawLine ) )
322 ' if the count of the item is the same as the one searching for...
323 ItemPosInString = InStr(XMLCLearedLine , WhichItem)
324 if ItemPosInString <> 0 then
327 GetLineInXMLBody = XMLCLearedLine
328 ' printlog "Debug from 'GetLineInXMLBody'-function: '" & XMLCLearedLine & "'"
338 '-------------------------------------------------------------------------------
340 function GetItemStyleName ( sMainElement$ , sUsedElement$ , sElement$ , sCount as integer , sStyleName$ ) as string
341 'Author: Joerg Sievers
342 '/// With GetItemStyleName you can get the NAME of a STYLE in ANY
343 '///+ Element in a main element (like office:body-content) of a OpenOffice.org XML-file.
345 '///+ If the second parameter is the SAME as the third one, you are not going into the
346 '///+ third area. You will stay in the DOM in the second, where e.g. table:table element could
348 '/// <u>simple Example</u>:
349 '///+ String = GetItemStyleName ( "office:body", "table:table", "table:table-row" , 3 , "table:style-name" )
350 '///+ Return: The (attribute) name of the 3rd table-row node in a Writer document.
351 '///+ With this name you can search in the style-section for the correct values.
352 Dim InputstreamXML as integer
355 Dim xElementName as string
356 'Read the file and go to the main DOM node
358 ' If you need a debug mode, enable the printlog entries
359 ' -----------------------------------------------------
360 ' printlog " +-- function: GetItemStyleName ---------------------------"
361 ' printlog " | Main Node : " & SAXGetElementName
362 'Go to the main element (like office:body, office:script, office:automatic-styles, etc.)
363 SAXSeekElement(sMainElement$)
364 ' printlog " | Main Element : " & sMainElement$
365 if sUsedElement$ <> sElement$ then
366 SAXSeekElement(sUsedElement$)
367 ' printlog " | Used Element : " & sUsedElement$
369 InputstreamXML = SaxGetChildCount
370 ' printlog " | Count of Children : " & InputstreamXML
371 for i = 1 to InputstreamXML
373 if SAXGetNodeType = NodeTypeElement then
374 xElementName = SAXGetElementName
375 if xElementName = sElement$ then
377 ' printlog "("& i & " / " & a & ") Element:" & xElementName
379 GetItemStyleName = SAXGetAttributeValue(sStyleName$)
380 ' printlog " | Name of Element : " & GetItemStyleName
387 ' printlog " +---------------------------------------------------------"
390 '-------------------------------------------------------------------------
392 function GetXMLElementPath ( sMainElement$ , sUsedElement$ , sStyleName$, sStyleNameValue$ ) as string
393 'Author: Joerg Sievers
394 '///+ Returns the path (in a DOM tree) for an exact named element.
395 '///+ With this string it is possible to navigate easily to a named
396 '///+ element with <i>SAXSeekElement</i>-function.
397 Dim InputstreamXML as integer
399 Dim xAttributeValue as string
401 Dim xElementName as string
402 'Read the file and go to the main DOM node
405 InputstreamXML = SaxGetChildCount
406 ' If you need a debug mode, enable the printlog entries
407 ' -----------------------------------------------------
408 ' printlog " +-- function: GetXMLElementPath --------------------------"
409 ' printlog " | Main Node : " & SAXGetElementName
410 'Go to the main element (like office:body, office:script, office:automatic-styles, etc.)
411 SAXSeekElement(sMainElement$)
412 ' printlog " | Main Element : " & sMainElement$
413 InputstreamXML = SaxGetChildCount
414 ' printlog " | Count of Children : " & InputstreamXML
415 for i = 1 to InputstreamXML
417 xElementName = SAXGetElementName(i)
418 if xElementName = sUsedElement$ then
419 xAttributeValue = SAXGetAttributeValue(sStyleName$)
420 if xAttributeValue = sStyleNameValue$ then
421 ' printlog " | Elementname (" & i & ") : " & xElementName
422 ' printlog " | Attribute value : " & xAttributeValue
423 GetXMLElementPath = SAXGetElementPath
429 ' printlog " +---------------------------------------------------------"
432 '-------------------------------------------------------------------------
434 function fWhereIsXMLElementInBody ( sSubDocumentRootElement as string , sDocumentRootElement as string , sWhichElement as string , OPTIONAL A ) as string
435 Dim iXMLElements as integer
437 Dim iXMLElementsInSecondLayer as integer
439 '/// A function to parse a XML DOM of an office document and return the <i>Elementpath</i>
440 '///+ of an element where you can search for in the <u>second level</u>.
441 SAXSeekElement(sSubDocumentRootElement)
442 '/// Input:<ol><li>Which element to be searched for</li>
443 '///+ <li><i>SubDocumentRootElement</i>:
444 '///+ <ul><li>office:document-meta</li>
445 '///+ <li>office:document-styles</li>
446 '///+ <li>office:document-content</li>
447 '///+ <li>office:document-settings</li></ul></li>
448 SAXSeekElement(sDocumentRootElement)
449 '///+ <li><i>DocumentRootElement</i>:
450 '///+ <ul><li>office:meta</li>
451 '///+ <li>office:settings</li>
452 '///+ <li>office:scripts</li>
453 '///+ <li>office:font-decls</li>
454 '///+ <li>office:styles</li>
455 '///+ <li>office:automatic-styles</li>
456 '///+ <li>office:master-styles</li>
457 '///+ <li>office:body</li></ul></li></ol>
458 iXMLElements = SAXGetChildCount
459 'If you need debug information, change next line value to TRUE
460 EnableQAErrors = FALSE
461 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody::iXMLElements = "& iXMLElements
462 for i = 1 to iXMLElements
463 if SAXHasElement(i) = TRUE then
465 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody::SAXSeekelement(i) [" & i & "] = +- "& SAXGetElementname(i)
466 iXMLElementsInSecondLayer = SAXGetChildCount
467 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody::iXMLElementsInSecondLayer [" & i & "] = +- "& iXMLElementsInSecondLayer
468 if iXMLElementsInSecondLayer <> 0 then
469 for k = 1 to iXMLElementsInSecondLayer
471 if SAXGetNodeType = NodeTypeElement then
472 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody::SAXGetElementName(a) [" & k & "] = +- "& SAXGetElementName(k)
474 if SAXGetElementName(k) = sWhichElement then
475 fWhereIsXMLElementInBody = SAXGetElementPath
476 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody =" & fWhereIsXMLElementInBody
480 if SAXGetElementName((k-1)+A) = sWhichElement then
482 SAXSeekElement(sWhichElement , A)
483 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody::SAXGetChildCount =" & SAXGetChildCount
484 fWhereIsXMLElementInBody = SAXGetElementPath
485 QAErrorLog "Debug: t_xml2.inc::fWhereIsXMLElementInBody =" & fWhereIsXMLElementInBody
497 '-------------------------------------------------------------------------