2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """Module for parsing Qt .qm files
25 @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation
26 of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break and print out
28 the missing tag. They are easy to implement and should follow the structure in 03
29 (Translation). We could find no examples that use these so we'd rather leave it
30 unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source text. We assume
32 that since they use a hash table to lookup the data there is actually no need for the
33 source text. It seems however that in Qt4's lrelease all data is included in the resultant .qm
35 @todo: We can only parse, not create, a .qm file. The main issue is that we need to
36 implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm). Unlike
37 Gettext it seems that the hash is required, but that has not been validated.
38 @todo: The code can parse files correctly. But it could be cleaned up to be more readable, especially
39 the part that breaks the file into sections.
42 from translate
.storage
import base
43 from translate
.misc
.multistring
import multistring
48 QM_MAGIC_NUMBER
= (0x3CB86418L
, 0xCAEF9C95L
, 0xCD211CBFL
, 0x60A1BDDDL
)
50 def qmunpack(qmfile
='messages.mo'):
51 """Helper to unpack Qt .qm files into a Python string"""
54 print "\\x%02x"*len(s
) % tuple(map(ord, s
))
57 class qmunit(base
.TranslationUnit
):
58 """A class representing a .qm translation message."""
59 def __init__(self
, source
=None):
60 super(qmunit
, self
).__init
__(source
)
62 class qmfile(base
.TranslationStore
):
63 """A class representing a .qm file."""
65 def __init__(self
, inputfile
=None, unitclass
=qmunit
):
66 self
.UnitClass
= unitclass
67 base
.TranslationStore
.__init
__(self
, unitclass
=unitclass
)
70 if inputfile
is not None:
71 self
.parsestring(inputfile
)
74 """Output a string representation of the .qm data file"""
77 def parse(self
, input):
78 """parses the given file or file source string"""
79 if hasattr(input, 'name'):
80 self
.filename
= input.name
81 elif not getattr(self
, 'filename', ''):
83 if hasattr(input, "read"):
87 magic
= struct
.unpack(">4L", input[:16])
88 if magic
!= QM_MAGIC_NUMBER
:
89 raise ValueError("This is not a .qm file")
92 while startsection
< len(input):
93 section_type
, length
= struct
.unpack(">bL", input[startsection
:startsection
+sectionheader
])
94 if section_type
== 0x42:
95 #print "Section: hash"
97 hash_start
= startsection
+sectionheader
98 hash_data
= struct
.unpack(">%db" % length
, input[startsection
+sectionheader
:startsection
+sectionheader
+length
])
99 elif section_type
== 0x69:
100 #print "Section: messages"
102 messages_start
= startsection
+sectionheader
103 messages_data
= struct
.unpack(">%db" % length
, input[startsection
+sectionheader
:startsection
+sectionheader
+length
])
104 elif section_type
== 0x2f:
105 #print "Section: contexts"
107 contexts_start
= startsection
+sectionheader
108 contexts_data
= struct
.unpack(">%db" % length
, input[startsection
+sectionheader
:startsection
+sectionheader
+length
])
109 startsection
= startsection
+sectionheader
+length
111 source
= target
= None
112 while pos
< messages_start
+ len(messages_data
):
113 subsection
, = struct
.unpack(">b", input[pos
:pos
+1])
114 if subsection
== 0x01: # End
117 if not source
is None and not target
is None:
118 newunit
= self
.addsourceunit(source
)
119 newunit
.target
= target
120 source
= target
= None
122 raise ValueError("Old .qm format with no source defined")
124 #print pos, subsection
126 length
, = struct
.unpack(">l", input[pos
:pos
+4])
127 if subsection
== 0x03: # Translation
129 raw
, = struct
.unpack(">%ds" % length
, input[pos
+4:pos
+4+length
])
130 string
, templen
= codecs
.utf_16_be_decode(raw
)
132 target
.strings
.append(string
)
134 target
= multistring(string
)
139 #print "Translation: %s" % target.encode('utf-8')
140 elif subsection
== 0x06: # SourceText
141 source
= input[pos
+4:pos
+4+length
].decode('iso-8859-1')
142 #print "SourceText: %s" % source
144 elif subsection
== 0x07: # Context
145 context
= input[pos
+4:pos
+4+length
].decode('iso-8859-1')
146 #print "Context: %s" % context
148 elif subsection
== 0x08: # Disambiguating-comment
149 comment
= input[pos
+4:pos
+4+length
]
150 #print "Disambiguating-comment: %s" % comment
152 elif subsection
== 0x05: # hash
153 hash = input[pos
:pos
+4]
154 #print "Hash: %s" % hash
157 if subsection
== 0x02: # SourceText16
158 subsection_name
= "SourceText16"
159 elif subsection
== 0x04: # Context16
160 subsection_name
= "Context16"
162 subsection_name
= "Unkown"
163 print >> sys
.stderr
, "Unimplemented: %s %s" % (subsection
, subsection_name
)