Fix order of exported HTML.
[dom-editor.git] / Dome / support.py
blob2da0f98765e26e1af92e4c6811cae8cb1d9b3dbe
1 import rox
3 import os, sys
4 import traceback
5 from Ft.Xml.Domlette import Node, implementation
6 from Ft.Xml import XMLNS_NAMESPACE
7 from Ft.Xml.Lib.Nss import GetAllNs
9 from string import find, lower, join
10 from socket import gethostbyaddr, gethostname
12 def node_to_xml(node):
13 "Takes an XML node and returns an XML documentElement suitable for saving."
14 root = implementation.createDocument(None, 'root', None)
15 new = node.cloneNode(deep = 1)
16 new = root.importNode(new, deep = 1)
17 root.replaceChild(new, root.documentElement)
18 return root
20 def node_to_html(node):
21 "Takes an XML node and returns an HTML documentElement suitable for saving."
22 root = implementation.createHTMLDocument('HTML document')
23 def html(doc, node, html):
24 new = doc.importNode(node.cloneNode(deep = 0), deep = 0)
25 if node.nodeType == Node.ELEMENT_NODE:
26 for a in node.attributes:
27 new.setAttribute(a.localName, a.value)
28 for k in node.childNodes:
29 new.appendChild(html(doc, k, html))
30 return new
31 new = html(root, node, html)
32 root.replaceChild(new, root.documentElement)
33 return root
35 def send_to_file(data, path):
36 try:
37 file = open(path, 'wb')
38 try:
39 file.write(data)
40 finally:
41 file.close()
42 except:
43 rox.report_exception()
44 return 0
46 return 1
48 def import_with_ns(doc, node):
49 nss = GetAllNs(node)
51 node = doc.importNode(node, 1)
52 for ns in nss.keys():
53 if ns == 'xml':
54 continue
55 uri = nss[ns]
56 if ns or uri:
57 if ns is None:
58 ns = 'xmlns'
59 else:
60 ns = 'xmlns:' + ns
61 node.setAttributeNS(XMLNS_NAMESPACE, ns, uri)
62 return node
64 def fix_broken_html(data):
65 """Pre-parse the data before sending to tidy to fix really really broken
66 stuff (eg, MS Word output). Returns None if data is OK"""
67 if data.find('<o:p>') == -1:
68 return # Doesn't need fixing?
69 import re
70 data = data.replace('<o:p></o:p>', '')
71 data = re.sub('<!\[[^]]*\]>', '', data)
72 return data
74 def to_html_doc(data):
75 (r, w) = os.pipe()
76 child = os.fork()
77 #data = data.replace('&nbsp;', ' ')
78 #data = data.replace('&copy;', '(c)')
79 #data = data.replace('&auml;', '(auml)')
80 #data = data.replace('&ouml;', '(ouml)')
81 fixed = fix_broken_html(data)
82 if child == 0:
83 # We are the child
84 try:
85 os.close(r)
86 os.dup2(w, 1)
87 os.close(w)
88 if fixed:
89 tin = os.popen('tidy --force-output yes -q -utf8 -asxml 2>/dev/null', 'w')
90 else:
91 tin = os.popen('tidy --force-output yes -q -asxml 2>/dev/null', 'w')
92 tin.write(fixed or data)
93 tin.close()
94 finally:
95 os._exit(0)
96 os.close(w)
98 data = os.fdopen(r).read()
99 os.waitpid(child, 0)
101 return data
103 def parse_data(data, path):
104 """Convert and XML document into a DOM Document."""
105 from Ft.Xml.InputSource import InputSourceFactory
106 #from Ft.Xml.cDomlette import nonvalParse
107 from Ft.Xml.FtMiniDom import nonvalParse
108 isrc = InputSourceFactory()
110 try:
111 try:
112 print "Parsing (with entities)..."
113 doc = nonvalParse(isrc.fromString(data, path))
114 except:
115 print "Parse failed.. retry without entities..."
116 data = entrefpattern.sub('&amp;\\1;',data)
117 doc = nonvalParse(isrc.fromString(data, path))
118 except:
119 type, val, tb = sys.exc_info()
120 traceback.print_exception(type, val, tb)
121 print "parsing failed!"
122 print "Data was:"
123 print data
124 #rox.report_exception()
125 raise Beep
126 else:
127 print "parse OK...",
128 return doc