Fixed version number (for 0release)
[dom-editor.git] / Dome / support.py
blobfaafc10227305dfed4eb362012d76b480796af13
1 import rox
3 import os, sys
4 import traceback
5 from xml.dom import Node
6 from Ft.Xml.Domlette import implementation
7 from Ft.Xml import XMLNS_NAMESPACE
9 from string import find, lower, join
10 from socket import gethostbyaddr, gethostname
12 import re
13 entrefpattern = re.compile('&(\D\S+);')
15 def node_to_xml(node):
16 "Takes an XML node and returns an XML documentElement suitable for saving."
17 root = implementation.createDocument(None, 'root', None)
18 new = node.cloneNode(1)
19 new = root.importNode(new, 1)
20 root.replaceChild(new, root.documentElement)
21 return root
23 def node_to_html(node):
24 "Takes an XML node and returns an HTML documentElement suitable for saving."
25 root = implementation.createHTMLDocument('HTML document')
26 def html(doc, node, html):
27 new = doc.importNode(node.cloneNode(deep = 0), deep = 0)
28 if node.nodeType == Node.ELEMENT_NODE:
29 for a in node.attributes:
30 new.setAttribute(a.localName, a.value)
31 for k in node.childNodes:
32 new.appendChild(html(doc, k, html))
33 return new
34 new = html(root, node, html)
35 root.replaceChild(new, root.documentElement)
36 return root
38 def send_to_file(data, path):
39 try:
40 file = open(path, 'wb')
41 try:
42 file.write(data)
43 finally:
44 file.close()
45 except:
46 rox.report_exception()
47 return 0
49 return 1
51 def fix_broken_html(data):
52 """Pre-parse the data before sending to tidy to fix really really broken
53 stuff (eg, MS Word output). Returns None if data is OK"""
54 if data.find('<o:p>') == -1:
55 return # Doesn't need fixing?
56 import re
57 data = data.replace('<o:p></o:p>', '')
58 data = re.sub('<!\[[^]]*\]>', '', data)
59 return data
61 def to_html_doc(data):
62 "Run data though tidy and return the resulting XML text"
63 (r, w) = os.pipe()
64 child = os.fork()
65 #data = data.replace('&nbsp;', ' ')
66 #data = data.replace('&copy;', '(c)')
67 #data = data.replace('&auml;', '(auml)')
68 #data = data.replace('&ouml;', '(ouml)')
69 fixed = fix_broken_html(data)
70 if child == 0:
71 # We are the child
72 try:
73 os.close(r)
74 os.dup2(w, 1)
75 os.close(w)
76 if fixed:
77 tin = os.popen('tidy --force-output yes -q -utf8 -asxml 2>/dev/null', 'w')
78 else:
79 tin = os.popen('tidy --force-output yes -q -asxml 2>/dev/null', 'w')
80 tin.write(fixed or data)
81 tin.close()
82 finally:
83 os._exit(0)
84 os.close(w)
86 data = os.fdopen(r).read()
87 os.waitpid(child, 0)
89 return data
91 def parse_data(data, path):
92 """Convert and XML document into a DOM Document."""
93 from Ft.Xml.InputSource import InputSourceFactory
94 #from Ft.Xml.cDomlette import NonvalParse
95 from Ft.Xml.FtMiniDom import NonvalParse
96 isrc = InputSourceFactory()
98 try:
99 try:
100 #print "Parsing (with entities)..."
101 doc = NonvalParse(isrc.fromString(data, path))
102 except:
103 print "Parse failed.. retry without entities..."
104 data = entrefpattern.sub('&amp;\\1;',data)
105 doc = NonvalParse(isrc.fromString(data, path))
106 except:
107 type, val, tb = sys.exc_info()
108 traceback.print_exception(type, val, tb)
109 print "parsing failed!"
110 print "Data was:"
111 print data
112 #rox.report_exception()
113 raise
114 return doc