2 # -*- coding: utf-8 -*-
4 # Copyright 2004-2006 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """convert HTML files to Gettext PO localization files
25 See: http://translate.sourceforge.net/wiki/toolkit/html2po for examples and
29 from translate
.storage
import po
30 from translate
.storage
import html
33 def convertfile(self
, inputfile
, filename
, includeheader
, includeuntagged
=False, duplicatestyle
="msgid_comment"):
34 """converts a html file to .po format"""
35 thetargetfile
= po
.pofile()
36 htmlparser
= html
.htmlfile(includeuntaggeddata
=includeuntagged
, inputfile
=inputfile
)
38 targetheader
= thetargetfile
.makeheader(charset
="UTF-8", encoding
="8bit")
39 thetargetfile
.addunit(targetheader
)
40 for htmlunit
in htmlparser
.units
:
41 thepo
= thetargetfile
.addsourceunit(htmlunit
.source
)
42 thepo
.addlocations(htmlunit
.getlocations())
43 thetargetfile
.removeduplicates(duplicatestyle
)
46 def converthtml(inputfile
, outputfile
, templates
, includeuntagged
=False, pot
=False, duplicatestyle
="msgctxt"):
47 """reads in stdin using fromfileclass, converts using convertorclass, writes to stdout"""
49 outputfilepos
= outputfile
.tell()
50 includeheader
= outputfilepos
== 0
51 outputstore
= convertor
.convertfile(inputfile
, getattr(inputfile
, "name", "unknown"), includeheader
, includeuntagged
, duplicatestyle
=duplicatestyle
)
52 outputfile
.write(str(outputstore
))
56 from translate
.convert
import convert
57 from translate
.misc
import stdiotell
59 sys
.stdout
= stdiotell
.StdIOWrapper(sys
.stdout
)
60 formats
= {"html":("po", converthtml
), "htm":("po", converthtml
), "xhtml":("po", converthtml
), None:("po", converthtml
)}
61 parser
= convert
.ConvertOptionParser(formats
, usepots
=True, description
=__doc__
)
62 parser
.add_option("-u", "--untagged", dest
="includeuntagged", default
=False, action
="store_true",
63 help="include untagged sections")
64 parser
.passthrough
.append("includeuntagged")
65 parser
.add_duplicates_option()
66 parser
.passthrough
.append("pot")
70 if __name__
== '__main__':