Update namespace with leave/enter.
[dom-editor.git] / Dome / support.py
blobeb5d83b27c8f5260b2aac4b76447b8d41f79a6c1
1 import rox
3 import os, sys
4 import traceback
5 from xml.dom import Node
6 from Ft.Xml.Domlette import implementation
7 from Ft.Xml import XMLNS_NAMESPACE
8 from Ft.Xml.Lib.Nss import GetAllNs
10 from string import find, lower, join
11 from socket import gethostbyaddr, gethostname
13 import re
14 entrefpattern = re.compile('&(\D\S+);')
16 def node_to_xml(node):
17 "Takes an XML node and returns an XML documentElement suitable for saving."
18 root = implementation.createDocument(None, 'root', None)
19 new = node.cloneNode(1)
20 new = root.importNode(new, 1)
21 root.replaceChild(new, root.documentElement)
22 return root
24 def node_to_html(node):
25 "Takes an XML node and returns an HTML documentElement suitable for saving."
26 root = implementation.createHTMLDocument('HTML document')
27 def html(doc, node, html):
28 new = doc.importNode(node.cloneNode(deep = 0), deep = 0)
29 if node.nodeType == Node.ELEMENT_NODE:
30 for a in node.attributes:
31 new.setAttribute(a.localName, a.value)
32 for k in node.childNodes:
33 new.appendChild(html(doc, k, html))
34 return new
35 new = html(root, node, html)
36 root.replaceChild(new, root.documentElement)
37 return root
39 def send_to_file(data, path):
40 try:
41 file = open(path, 'wb')
42 try:
43 file.write(data)
44 finally:
45 file.close()
46 except:
47 rox.report_exception()
48 return 0
50 return 1
52 def fix_broken_html(data):
53 """Pre-parse the data before sending to tidy to fix really really broken
54 stuff (eg, MS Word output). Returns None if data is OK"""
55 if data.find('<o:p>') == -1:
56 return # Doesn't need fixing?
57 import re
58 data = data.replace('<o:p></o:p>', '')
59 data = re.sub('<!\[[^]]*\]>', '', data)
60 return data
62 def to_html_doc(data):
63 "Run data though tidy and return the resulting XML text"
64 (r, w) = os.pipe()
65 child = os.fork()
66 #data = data.replace('&nbsp;', ' ')
67 #data = data.replace('&copy;', '(c)')
68 #data = data.replace('&auml;', '(auml)')
69 #data = data.replace('&ouml;', '(ouml)')
70 fixed = fix_broken_html(data)
71 if child == 0:
72 # We are the child
73 try:
74 os.close(r)
75 os.dup2(w, 1)
76 os.close(w)
77 if fixed:
78 tin = os.popen('tidy --force-output yes -q -utf8 -asxml 2>/dev/null', 'w')
79 else:
80 tin = os.popen('tidy --force-output yes -q -asxml 2>/dev/null', 'w')
81 tin.write(fixed or data)
82 tin.close()
83 finally:
84 os._exit(0)
85 os.close(w)
87 data = os.fdopen(r).read()
88 os.waitpid(child, 0)
90 return data
92 def parse_data(data, path):
93 """Convert and XML document into a DOM Document."""
94 from Ft.Xml.InputSource import InputSourceFactory
95 #from Ft.Xml.cDomlette import nonvalParse
96 from Ft.Xml.FtMiniDom import nonvalParse
97 isrc = InputSourceFactory()
99 try:
100 try:
101 print "Parsing (with entities)..."
102 doc = nonvalParse(isrc.fromString(data, path))
103 except:
104 print "Parse failed.. retry without entities..."
105 data = entrefpattern.sub('&amp;\\1;',data)
106 doc = nonvalParse(isrc.fromString(data, path))
107 except:
108 type, val, tb = sys.exc_info()
109 traceback.print_exception(type, val, tb)
110 print "parsing failed!"
111 print "Data was:"
112 print data
113 #rox.report_exception()
114 raise
115 else:
116 print "parse OK...",
117 return doc