fix git support for v1.5.3 (or higher) by setting "--work-tree"
[translate_toolkit.git] / storage / qm.py
blobe15112e6f83dd4733c91c6cdb51d10ea220aeac6
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """Module for parsing Qt .qm files
25 @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation
26 of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break and print out
28 the missing tag. They are easy to implement and should follow the structure in 03
29 (Translation). We could find no examples that use these so we'd rather leave it
30 unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source text. We assume
32 that since they use a hash table to lookup the data there is actually no need for the
33 source text. It seems however that in Qt4's lrelease all data is included in the resultant .qm
34 file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we need to
36 implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm). Unlike
37 Gettext it seems that the hash is required, but that has not been validated.
38 @todo: The code can parse files correctly. But it could be cleaned up to be more readable, especially
39 the part that breaks the file into sections.
40 """
42 from translate.storage import base
43 from translate.misc.multistring import multistring
44 import codecs
45 import struct
46 import sys
48 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
50 def qmunpack(qmfile='messages.mo'):
51 """Helper to unpack Qt .qm files into a Python string"""
52 f = open(qmfile)
53 s = f.read()
54 print "\\x%02x"*len(s) % tuple(map(ord, s))
55 f.close()
57 class qmunit(base.TranslationUnit):
58 """A class representing a .qm translation message."""
59 def __init__(self, source=None):
60 super(qmunit, self).__init__(source)
62 class qmfile(base.TranslationStore):
63 """A class representing a .qm file."""
64 UnitClass = qmunit
65 def __init__(self, inputfile=None, unitclass=qmunit):
66 self.UnitClass = unitclass
67 base.TranslationStore.__init__(self, unitclass=unitclass)
68 self.units = []
69 self.filename = ''
70 if inputfile is not None:
71 self.parsestring(inputfile)
73 def __str__(self):
74 """Output a string representation of the .qm data file"""
75 return ""
77 def parse(self, input):
78 """parses the given file or file source string"""
79 if hasattr(input, 'name'):
80 self.filename = input.name
81 elif not getattr(self, 'filename', ''):
82 self.filename = ''
83 if hasattr(input, "read"):
84 qmsrc = input.read()
85 input.close()
86 input = qmsrc
87 magic = struct.unpack(">4L", input[:16])
88 if magic != QM_MAGIC_NUMBER:
89 raise ValueError("This is not a .qm file")
90 startsection = 16
91 sectionheader = 5
92 while startsection < len(input):
93 section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader])
94 if section_type == 0x42:
95 #print "Section: hash"
96 hashash = True
97 hash_start = startsection+sectionheader
98 hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
99 elif section_type == 0x69:
100 #print "Section: messages"
101 hasmessages = True
102 messages_start = startsection+sectionheader
103 messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
104 elif section_type == 0x2f:
105 #print "Section: contexts"
106 hascontexts = True
107 contexts_start = startsection+sectionheader
108 contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
109 startsection = startsection+sectionheader+length
110 pos = messages_start
111 source = target = None
112 while pos < messages_start + len(messages_data):
113 subsection, = struct.unpack(">b", input[pos:pos+1])
114 if subsection == 0x01: # End
115 #print "End"
116 pos = pos+1
117 if not source is None and not target is None:
118 newunit = self.addsourceunit(source)
119 newunit.target = target
120 source = target = None
121 else:
122 raise ValueError("Old .qm format with no source defined")
123 continue
124 #print pos, subsection
125 pos = pos+1
126 length, = struct.unpack(">l", input[pos:pos+4])
127 if subsection == 0x03: # Translation
128 if length != -1:
129 raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length])
130 string, templen = codecs.utf_16_be_decode(raw)
131 if target:
132 target.strings.append(string)
133 else:
134 target = multistring(string)
135 pos = pos+4+length
136 else:
137 target = ""
138 pos = pos+4
139 #print "Translation: %s" % target.encode('utf-8')
140 elif subsection == 0x06: # SourceText
141 source = input[pos+4:pos+4+length].decode('iso-8859-1')
142 #print "SourceText: %s" % source
143 pos = pos+4+length
144 elif subsection == 0x07: # Context
145 context = input[pos+4:pos+4+length].decode('iso-8859-1')
146 #print "Context: %s" % context
147 pos = pos+4+length
148 elif subsection == 0x08: # Disambiguating-comment
149 comment = input[pos+4:pos+4+length]
150 #print "Disambiguating-comment: %s" % comment
151 pos = pos+4+length
152 elif subsection == 0x05: # hash
153 hash = input[pos:pos+4]
154 #print "Hash: %s" % hash
155 pos = pos+4
156 else:
157 if subsection == 0x02: # SourceText16
158 subsection_name = "SourceText16"
159 elif subsection == 0x04: # Context16
160 subsection_name = "Context16"
161 else:
162 subsection_name = "Unkown"
163 print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name)
164 return