5 from Ft
.Xml
.Domlette
import Node
, implementation
6 from Ft
.Xml
import XMLNS_NAMESPACE
7 from Ft
.Xml
.Lib
.Nss
import GetAllNs
9 from string
import find
, lower
, join
10 from socket
import gethostbyaddr
, gethostname
13 entrefpattern
= re
.compile('&(\D\S+);')
15 def node_to_xml(node
):
16 "Takes an XML node and returns an XML documentElement suitable for saving."
17 root
= implementation
.createDocument(None, 'root', None)
18 new
= node
.cloneNode(deep
= 1)
19 new
= root
.importNode(new
, deep
= 1)
20 root
.replaceChild(new
, root
.documentElement
)
23 def node_to_html(node
):
24 "Takes an XML node and returns an HTML documentElement suitable for saving."
25 root
= implementation
.createHTMLDocument('HTML document')
26 def html(doc
, node
, html
):
27 new
= doc
.importNode(node
.cloneNode(deep
= 0), deep
= 0)
28 if node
.nodeType
== Node
.ELEMENT_NODE
:
29 for a
in node
.attributes
:
30 new
.setAttribute(a
.localName
, a
.value
)
31 for k
in node
.childNodes
:
32 new
.appendChild(html(doc
, k
, html
))
34 new
= html(root
, node
, html
)
35 root
.replaceChild(new
, root
.documentElement
)
38 def send_to_file(data
, path
):
40 file = open(path
, 'wb')
46 rox
.report_exception()
51 def import_with_ns(doc
, node
):
54 node
= doc
.importNode(node
, 1)
64 node
.setAttributeNS(XMLNS_NAMESPACE
, ns
, uri
)
67 def fix_broken_html(data
):
68 """Pre-parse the data before sending to tidy to fix really really broken
69 stuff (eg, MS Word output). Returns None if data is OK"""
70 if data
.find('<o:p>') == -1:
71 return # Doesn't need fixing?
73 data
= data
.replace('<o:p></o:p>', '')
74 data
= re
.sub('<!\[[^]]*\]>', '', data
)
77 def to_html_doc(data
):
80 #data = data.replace(' ', ' ')
81 #data = data.replace('©', '(c)')
82 #data = data.replace('ä', '(auml)')
83 #data = data.replace('ö', '(ouml)')
84 fixed
= fix_broken_html(data
)
92 tin
= os
.popen('tidy --force-output yes -q -utf8 -asxml 2>/dev/null', 'w')
94 tin
= os
.popen('tidy --force-output yes -q -asxml 2>/dev/null', 'w')
95 tin
.write(fixed
or data
)
101 data
= os
.fdopen(r
).read()
106 def parse_data(data
, path
):
107 """Convert and XML document into a DOM Document."""
108 from Ft
.Xml
.InputSource
import InputSourceFactory
109 #from Ft.Xml.cDomlette import nonvalParse
110 from Ft
.Xml
.FtMiniDom
import nonvalParse
111 isrc
= InputSourceFactory()
115 print "Parsing (with entities)..."
116 doc
= nonvalParse(isrc
.fromString(data
, path
))
118 print "Parse failed.. retry without entities..."
119 data
= entrefpattern
.sub('&\\1;',data
)
120 doc
= nonvalParse(isrc
.fromString(data
, path
))
122 type, val
, tb
= sys
.exc_info()
123 traceback
.print_exception(type, val
, tb
)
124 print "parsing failed!"
127 #rox.report_exception()