1 # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
3 # This file is part of the LibreOffice project.
5 # This Source Code Form is subject to the terms of the Mozilla Public
6 # License, v. 2.0. If a copy of the MPL was not distributed with this
7 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 # This file incorporates work covered by the following license notice:
11 # Licensed to the Apache Software Foundation (ASF) under one or more
12 # contributor license agreements. See the NOTICE file distributed
13 # with this work for additional information regarding copyright
14 # ownership. The ASF licenses this file to you under the Apache
15 # License, Version 2.0 (the "License"); you may not use this file
16 # except in compliance with the License. You may obtain a copy of
17 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 from unohelper
import Base
,systemPathToFileUrl
, absolutize
25 from com
.sun
.star
.beans
import PropertyValue
26 from com
.sun
.star
.beans
.PropertyState
import DIRECT_VALUE
27 from com
.sun
.star
.uno
import Exception as UnoException
28 from com
.sun
.star
.io
import IOException
,XInputStream
, XOutputStream
30 class OutputStream(Base
, XOutputStream
):
34 def closeOutput(self
):
37 def writeBytes(self
, seq
):
38 sys
.stdout
.write(seq
.value
)
48 opts
, args
= getopt
.getopt(sys
.argv
[1:], "hc:", ["help", "connection-string=", "html"])
50 url
= "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
51 filterName
= "Text (Encoded)"
53 if o
in ("-h", "--help"):
56 if o
in ("-c", "--connection-string"):
57 url
= "uno:" + a
+ ";urp;StarOffice.ComponentContext"
59 filterName
= "HTML (StarWriter)"
66 ctxLocal
= uno
.getComponentContext()
67 smgrLocal
= ctxLocal
.ServiceManager
69 resolver
= smgrLocal
.createInstanceWithContext(
70 "com.sun.star.bridge.UnoUrlResolver", ctxLocal
)
71 ctx
= resolver
.resolve(url
)
72 smgr
= ctx
.ServiceManager
74 desktop
= smgr
.createInstanceWithContext("com.sun.star.frame.Desktop", ctx
)
76 cwd
= systemPathToFileUrl(getcwd())
78 PropertyValue("FilterName" , 0, filterName
, 0),
79 PropertyValue("OutputStream", 0, OutputStream(), 0))
80 inProps
= PropertyValue("Hidden", 0 , True, 0),
83 fileUrl
= uno
.absolutize(cwd
, systemPathToFileUrl(path
))
84 doc
= desktop
.loadComponentFromURL(fileUrl
, "_blank", 0, inProps
)
87 raise UnoException("Could not open stream for unknown reason", None)
89 doc
.storeToURL("private:stream", outProps
)
90 except IOException
as e
:
91 sys
.stderr
.write("Error during conversion: " + e
.Message
+ "\n")
93 except UnoException
as e
:
94 sys
.stderr
.write("Error (" + repr(e
.__class
__) + ") during conversion: " + e
.Message
+ "\n")
99 except UnoException
as e
:
100 sys
.stderr
.write("Error (" + repr(e
.__class
__) + "): " + e
.Message
+ "\n")
102 except getopt
.GetoptError
as e
:
103 sys
.stderr
.write(str(e
) + "\n")
110 sys
.stderr
.write("usage: ooextract.py --help |\n"+
111 " [-c <connection-string> | --connection-string=<connection-string>\n"+
112 " file1 file2 ...\n"+
114 "Extracts plain text from documents and prints it to stdout.\n" +
115 "Requires an OpenOffice.org instance to be running. The script and the\n"+
116 "running OpenOffice.org instance must be able to access the file with\n"+
117 "by the same system path.\n"
119 "-c <connection-string> | --connection-string=<connection-string>\n" +
120 " The connection-string part of a UNO URL to where the\n" +
121 " the script should connect to in order to do the conversion.\n" +
122 " The strings defaults to socket,host=localhost,port=2002\n"
124 " Instead of the text filter, the writer html filter is used\n"
129 # vim: set shiftwidth=4 softtabstop=4 expandtab: