5 from Ft
.Xml
.Domlette
import Node
, implementation
6 from Ft
.Xml
import XMLNS_NAMESPACE
7 from Ft
.Xml
.Lib
.Nss
import GetAllNs
9 from string
import find
, lower
, join
10 from socket
import gethostbyaddr
, gethostname
12 def node_to_xml(node
):
13 "Takes an XML node and returns an XML documentElement suitable for saving."
14 root
= implementation
.createDocument(None, 'root', None)
15 new
= node
.cloneNode(deep
= 1)
16 new
= root
.importNode(new
, deep
= 1)
17 root
.replaceChild(new
, root
.documentElement
)
20 def node_to_html(node
):
21 "Takes an XML node and returns an HTML documentElement suitable for saving."
22 root
= implementation
.createHTMLDocument('HTML document')
23 def html(doc
, node
, html
):
24 new
= doc
.importNode(node
.cloneNode(deep
= 0), deep
= 0)
25 if node
.nodeType
== Node
.ELEMENT_NODE
:
26 for a
in node
.attributes
:
27 new
.setAttribute(a
.localName
, a
.value
)
28 for k
in node
.childNodes
:
29 new
.appendChild(html(doc
, k
, html
))
31 new
= html(root
, node
, html
)
32 root
.replaceChild(new
, root
.documentElement
)
35 def send_to_file(data
, path
):
37 file = open(path
, 'wb')
43 rox
.report_exception()
48 def import_with_ns(doc
, node
):
51 node
= doc
.importNode(node
, 1)
61 node
.setAttributeNS(XMLNS_NAMESPACE
, ns
, uri
)
64 def fix_broken_html(data
):
65 """Pre-parse the data before sending to tidy to fix really really broken
66 stuff (eg, MS Word output). Returns None if data is OK"""
67 if data
.find('<o:p>') == -1:
68 return # Doesn't need fixing?
70 data
= data
.replace('<o:p></o:p>', '')
71 data
= re
.sub('<!\[[^]]*\]>', '', data
)
74 def to_html_doc(data
):
77 #data = data.replace(' ', ' ')
78 #data = data.replace('©', '(c)')
79 #data = data.replace('ä', '(auml)')
80 #data = data.replace('ö', '(ouml)')
81 fixed
= fix_broken_html(data
)
89 tin
= os
.popen('tidy --force-output yes -q -utf8 -asxml 2>/dev/null', 'w')
91 tin
= os
.popen('tidy --force-output yes -q -asxml 2>/dev/null', 'w')
92 tin
.write(fixed
or data
)
98 data
= os
.fdopen(r
).read()
103 def parse_data(data
, path
):
104 """Convert and XML document into a DOM Document."""
105 from Ft
.Xml
.InputSource
import InputSourceFactory
106 #from Ft.Xml.cDomlette import nonvalParse
107 from Ft
.Xml
.FtMiniDom
import nonvalParse
108 isrc
= InputSourceFactory()
112 print "Parsing (with entities)..."
113 doc
= nonvalParse(isrc
.fromString(data
, path
))
115 print "Parse failed.. retry without entities..."
116 data
= entrefpattern
.sub('&\\1;',data
)
117 doc
= nonvalParse(isrc
.fromString(data
, path
))
119 type, val
, tb
= sys
.exc_info()
120 traceback
.print_exception(type, val
, tb
)
121 print "parsing failed!"
124 #rox.report_exception()