for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / convert / html2po.py
blob0423a442dc7bcf11fb4ae2e3c71684b1cdfca9da
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Copyright 2004-2006 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """convert HTML files to Gettext PO localization files
25 See: http://translate.sourceforge.net/wiki/toolkit/html2po for examples and
26 usage instructions
27 """
29 from translate.storage import po
30 from translate.storage import html
32 class html2po:
33 def convertfile(self, inputfile, filename, includeheader, includeuntagged=False, duplicatestyle="msgid_comment"):
34 """converts a html file to .po format"""
35 thetargetfile = po.pofile()
36 htmlparser = html.htmlfile(includeuntaggeddata=includeuntagged, inputfile=inputfile)
37 if includeheader:
38 targetheader = thetargetfile.makeheader(charset="UTF-8", encoding="8bit")
39 thetargetfile.addunit(targetheader)
40 for htmlunit in htmlparser.units:
41 thepo = thetargetfile.addsourceunit(htmlunit.source)
42 thepo.addlocations(htmlunit.getlocations())
43 thetargetfile.removeduplicates(duplicatestyle)
44 return thetargetfile
46 def converthtml(inputfile, outputfile, templates, includeuntagged=False, pot=False, duplicatestyle="msgctxt"):
47 """reads in stdin using fromfileclass, converts using convertorclass, writes to stdout"""
48 convertor = html2po()
49 outputfilepos = outputfile.tell()
50 includeheader = outputfilepos == 0
51 outputstore = convertor.convertfile(inputfile, getattr(inputfile, "name", "unknown"), includeheader, includeuntagged, duplicatestyle=duplicatestyle)
52 outputfile.write(str(outputstore))
53 return 1
55 def main(argv=None):
56 from translate.convert import convert
57 from translate.misc import stdiotell
58 import sys
59 sys.stdout = stdiotell.StdIOWrapper(sys.stdout)
60 formats = {"html":("po", converthtml), "htm":("po", converthtml), "xhtml":("po", converthtml), None:("po", converthtml)}
61 parser = convert.ConvertOptionParser(formats, usepots=True, description=__doc__)
62 parser.add_option("-u", "--untagged", dest="includeuntagged", default=False, action="store_true",
63 help="include untagged sections")
64 parser.passthrough.append("includeuntagged")
65 parser.add_duplicates_option()
66 parser.passthrough.append("pot")
67 parser.run(argv)
70 if __name__ == '__main__':
71 main()