2 """Convert html files to ODF.
4 html2odt source.html destination.odt
7 from __future__
import with_statement
8 import sys
, os
, subprocess
, time
11 from com
.sun
.star
.beans
import PropertyValue
12 from com
.sun
.star
.connection
import NoConnectException
15 print >> sys
.stderr
, 'inspecting %r' % o
18 print >> sys
.stderr
, "%25s %s" % (a
, getattr(o
, a
))
20 print >> sys
.stderr
, "%s DOES NOT WORK! (%s)" % (a
, e
)
23 if path
.startswith('file:///'):
25 return "file://" + os
.path
.abspath(path
)
29 """Start up an open office and connect to it."""
30 accept_string
= "socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
32 self
.ooffice
= subprocess
.Popen(["ooffice", "-nologo", "-nodefault",
33 "-norestore", "-nofirststartwizard",
34 "-headless", "-invisible", "-nolockcheck",
35 "-accept=%s" % accept_string
])
40 local
= uno
.getComponentContext()
41 self
.resolver
= local
.ServiceManager
.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", local
)
42 self
.context
= self
.resolver
.resolve("uno:" + accept_string
)
43 self
.desktop
= self
.unobject("com.sun.star.frame.Desktop", self
.context
)
46 except NoConnectException
:
47 print >> sys
.stderr
, '.',
49 def unobject(self
, klass
, context
=None):
50 """get an instance of the class named by <klass>. It will
51 probably be a string that looks like
52 'com.sun.something.SomeThing'."""
54 return self
.context
.ServiceManager
.createInstance(klass
)
55 return self
.context
.ServiceManager
.createInstanceWithContext(klass
, context
)
58 """Attempt to load as TextDocument format, but fall back to
59 WebDocument if that doesn't work. (WebDocument is called
60 writer/web in the gui and is less exportable."""
61 # import property values:
62 # http://api.openoffice.org/docs/common/ref/com/sun/star/document/MediaDescriptor.html
64 return self
.desktop
.loadComponentFromURL(src
, "_blank", 0,
65 (PropertyValue("Hidden" , 0 , True, 0),
66 PropertyValue("FilterName" , 0 , 'HTML (StarWriter)', 0),
69 print >> sys
.stderr
, e
70 #fall back on default WebDocument format
71 return self
.desktop
.loadComponentFromURL(src
, "_blank", 0,
72 (PropertyValue("Hidden" , 0 , True, 0),
75 def embed_graphics(self
, doc
):
76 """Reset each graphic object to an embedded copy of itself."""
77 gp
= self
.unobject("com.sun.star.graphic.GraphicProvider")
78 for i
in range(doc
.GraphicObjects
.Count
):
79 g
= doc
.GraphicObjects
.getByIndex(i
)
80 props
= (PropertyValue("URL", 0, g
.GraphicURL
, 0),)
81 g
.setPropertyValue("Graphic", gp
.queryGraphic(props
))
83 def convert(self
, src
, dest
):
84 """Use the connected open office instance to convert the file
85 named by <src> into odf and save it as <dest>.
87 The main trick here is forcing the images to be stored inline."""
90 print >> sys
.stderr
, src
91 print >> sys
.stderr
, dest
94 self
.embed_graphics(doc
)
95 doc
.storeToURL(dest
, (PropertyValue("FilterName", 0, 'writer8', 0),
96 PropertyValue("Overwrite", 0, True, 0 )))
102 def __exit__(self
, exc_type
, exc_value
, traceback
):
103 self
.desktop
.dispose()
104 self
.context
.dispose()
106 os
.kill(self
.ooffice
.pid
, 15)
108 if self
.ooffice
.poll():
110 print >> sys
.stderr
, '*',
112 os
.kill(self
.ooffice
.pid
, 9)
115 def set_env(workdir
):
116 workdir
= os
.path
.abspath(workdir
)
117 os
.environ
['HOME'] = workdir
119 print >> sys
.stderr
, os
.environ
121 if __name__
== '__main__':
122 workdir
, src
, dest
= sys
.argv
[1:4]
126 oo
.convert(src
, dest
)