5 from xml
.dom
import Node
6 from Ft
.Xml
.Domlette
import implementation
7 from Ft
.Xml
import XMLNS_NAMESPACE
8 from Ft
.Xml
.Lib
.Nss
import GetAllNs
10 from string
import find
, lower
, join
11 from socket
import gethostbyaddr
, gethostname
14 entrefpattern
= re
.compile('&(\D\S+);')
16 def node_to_xml(node
):
17 "Takes an XML node and returns an XML documentElement suitable for saving."
18 root
= implementation
.createDocument(None, 'root', None)
19 new
= node
.cloneNode(1)
20 new
= root
.importNode(new
, 1)
21 root
.replaceChild(new
, root
.documentElement
)
24 def node_to_html(node
):
25 "Takes an XML node and returns an HTML documentElement suitable for saving."
26 root
= implementation
.createHTMLDocument('HTML document')
27 def html(doc
, node
, html
):
28 new
= doc
.importNode(node
.cloneNode(deep
= 0), deep
= 0)
29 if node
.nodeType
== Node
.ELEMENT_NODE
:
30 for a
in node
.attributes
:
31 new
.setAttribute(a
.localName
, a
.value
)
32 for k
in node
.childNodes
:
33 new
.appendChild(html(doc
, k
, html
))
35 new
= html(root
, node
, html
)
36 root
.replaceChild(new
, root
.documentElement
)
39 def send_to_file(data
, path
):
41 file = open(path
, 'wb')
47 rox
.report_exception()
52 def fix_broken_html(data
):
53 """Pre-parse the data before sending to tidy to fix really really broken
54 stuff (eg, MS Word output). Returns None if data is OK"""
55 if data
.find('<o:p>') == -1:
56 return # Doesn't need fixing?
58 data
= data
.replace('<o:p></o:p>', '')
59 data
= re
.sub('<!\[[^]]*\]>', '', data
)
62 def to_html_doc(data
):
63 "Run data though tidy and return the resulting XML text"
66 #data = data.replace(' ', ' ')
67 #data = data.replace('©', '(c)')
68 #data = data.replace('ä', '(auml)')
69 #data = data.replace('ö', '(ouml)')
70 fixed
= fix_broken_html(data
)
78 tin
= os
.popen('tidy --force-output yes -q -utf8 -asxml 2>/dev/null', 'w')
80 tin
= os
.popen('tidy --force-output yes -q -asxml 2>/dev/null', 'w')
81 tin
.write(fixed
or data
)
87 data
= os
.fdopen(r
).read()
92 def parse_data(data
, path
):
93 """Convert and XML document into a DOM Document."""
94 from Ft
.Xml
.InputSource
import InputSourceFactory
95 #from Ft.Xml.cDomlette import nonvalParse
96 from Ft
.Xml
.FtMiniDom
import nonvalParse
97 isrc
= InputSourceFactory()
101 print "Parsing (with entities)..."
102 doc
= nonvalParse(isrc
.fromString(data
, path
))
104 print "Parse failed.. retry without entities..."
105 data
= entrefpattern
.sub('&\\1;',data
)
106 doc
= nonvalParse(isrc
.fromString(data
, path
))
108 type, val
, tb
= sys
.exc_info()
109 traceback
.print_exception(type, val
, tb
)
110 print "parsing failed!"
113 #rox.report_exception()