2 # -*- coding: utf-8 -*-
4 # Copyright 2002-2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """Classes that hold units of .po files (pounit) or entire files (pofile).
24 Gettext-style .po (or .pot) files are used in translations for KDE, GNOME and
27 This uses libgettextpo from the gettext package. Any version before 0.17 will
28 at least cause some subtle bugs or may not work at all. Developers might want
29 to have a look at gettext-tools/libgettextpo/gettext-po.h from the gettext
30 package for the public API of the library.
33 from translate
.misc
.multistring
import multistring
34 from translate
.storage
import pocommon
35 from translate
.misc
import quote
36 from translate
.lang
import data
40 import cStringIO
as StringIO
50 """Seperator for #: entries"""
55 class po_message(Structure
):
59 xerror_prototype
= CFUNCTYPE(None, c_int
, POINTER(po_message
), STRING
, c_uint
, c_uint
, c_int
, STRING
)
60 xerror2_prototype
= CFUNCTYPE(None, c_int
, POINTER(po_message
), STRING
, c_uint
, c_uint
, c_int
, STRING
, POINTER(po_message
), STRING
, c_uint
, c_uint
, c_int
, STRING
)
63 # Structures (error handler)
64 class po_xerror_handler(Structure
):
65 _fields_
= [('xerror', xerror_prototype
),
66 ('xerror2', xerror2_prototype
)]
68 class po_error_handler(Structure
):
70 ('error', CFUNCTYPE(None, c_int
, c_int
, STRING
)),
71 ('error_at_line', CFUNCTYPE(None, c_int
, c_int
, STRING
, c_uint
, STRING
)),
72 ('multiline_warning', CFUNCTYPE(None, STRING
, STRING
)),
73 ('multiline_error', CFUNCTYPE(None, STRING
, STRING
)),
76 # Callback functions for po_xerror_handler
77 def xerror_cb(severity
, message
, filename
, lineno
, column
, multilint_p
, message_text
):
78 print >> sys
.stderr
, "xerror_cb", severity
, message
, filename
, lineno
, column
, multilint_p
, message_text
80 raise ValueError(message_text
)
82 def xerror2_cb(severity
, message1
, filename1
, lineno1
, column1
, multiline_p1
, message_text1
, message2
, filename2
, lineno2
, column2
, multiline_p2
, message_text2
):
83 print >> sys
.stderr
, "xerror2_cb", severity
, message1
, filename1
, lineno1
, column1
, multiline_p1
, message_text1
, message2
, filename2
, lineno2
, column2
, multiline_p2
, message_text2
85 raise ValueError(message_text1
)
91 # 'gettextpo' is recognised on Unix, while only 'libgettextpo' is recognised on
92 # windows. Therefore we test both.
93 names
= ['gettextpo', 'libgettextpo']
95 lib_location
= ctypes
.util
.find_library(name
)
97 gpo
= cdll
.LoadLibrary(lib_location
)
101 # Now we are getting desperate, so let's guess a unix type DLL that might
102 # be in LD_LIBRARY_PATH or loaded with LD_PRELOAD
104 gpo
= cdll
.LoadLibrary('libgettextpo.so')
106 raise ImportError("gettext PO library not found")
108 # Setup return and paramater types
110 gpo
.po_file_read_v3
.argtypes
= [STRING
, POINTER(po_xerror_handler
)]
111 gpo
.po_file_write_v2
.argtypes
= [c_int
, STRING
, POINTER(po_xerror_handler
)]
112 gpo
.po_file_write_v2
.retype
= c_int
115 gpo
.po_file_domain_header
.restype
= STRING
116 gpo
.po_header_field
.restype
= STRING
117 gpo
.po_header_field
.argtypes
= [STRING
, STRING
]
119 # Locations (filepos)
120 gpo
.po_filepos_file
.restype
= STRING
121 gpo
.po_message_filepos
.restype
= c_int
122 gpo
.po_message_filepos
.argtypes
= [c_int
, c_int
]
123 gpo
.po_message_add_filepos
.argtypes
= [c_int
, STRING
, c_int
]
125 # Message (get methods)
126 gpo
.po_message_comments
.restype
= STRING
127 gpo
.po_message_extracted_comments
.restype
= STRING
128 gpo
.po_message_prev_msgctxt
.restype
= STRING
129 gpo
.po_message_prev_msgid
.restype
= STRING
130 gpo
.po_message_prev_msgid_plural
.restype
= STRING
131 gpo
.po_message_is_format
.restype
= c_int
132 gpo
.po_message_msgctxt
.restype
= STRING
133 gpo
.po_message_msgid
.restype
= STRING
134 gpo
.po_message_msgid_plural
.restype
= STRING
135 gpo
.po_message_msgstr
.restype
= STRING
136 gpo
.po_message_msgstr_plural
.restype
= STRING
138 # Message (set methods)
139 gpo
.po_message_set_comments
.argtypes
= [c_int
, STRING
]
140 gpo
.po_message_set_extracted_comments
.argtypes
= [c_int
, STRING
]
141 gpo
.po_message_set_fuzzy
.argtypes
= [c_int
, c_int
]
142 gpo
.po_message_set_msgctxt
.argtypes
= [c_int
, STRING
]
144 # Setup the po_xerror_handler
145 xerror_handler
= po_xerror_handler()
146 xerror_handler
.xerror
= xerror_prototype(xerror_cb
)
147 xerror_handler
.xerror2
= xerror2_prototype(xerror2_cb
)
149 def escapeforpo(text
):
150 return pypo
.escapeforpo(text
)
152 def quoteforpo(text
):
153 return pypo
.quoteforpo(text
)
155 def unquotefrompo(postr
, joinwithlinebreak
=False):
156 return pypo
.unquotefrompo(postr
, joinwithlinebreak
)
158 def encodingToUse(encoding
):
159 return pypo
.encodingToUse(encoding
)
161 class pounit(pocommon
.pounit
):
162 def __init__(self
, source
=None, encoding
='utf-8', gpo_message
=None):
163 self
._encoding
= encoding
165 self
._gpo
_message
= gpo
.po_message_create()
166 if source
or source
== "":
170 self
._gpo
_message
= gpo_message
172 def setmsgidcomment(self
, msgidcomment
):
174 newsource
= "_: " + msgidcomment
+ "\n" + self
.source
175 self
.source
= newsource
176 msgidcomment
= property(None, setmsgidcomment
)
178 def setmsgid_plural(self
, msgid_plural
):
179 if isinstance(msgid_plural
, list):
180 msgid_plural
= "".join(msgid_plural
)
181 gpo
.po_message_set_msgid_plural(self
._gpo
_message
, msgid_plural
)
182 msgid_plural
= property(None, setmsgid_plural
)
185 def remove_msgid_comments(text
):
188 if text
.startswith("_:"):
189 remainder
= re
.search(r
"_: .*\n(.*)", text
)
191 return remainder
.group(1)
196 singular
= remove_msgid_comments(gpo
.po_message_msgid(self
._gpo
_message
))
198 multi
= multistring(singular
, self
._encoding
)
200 pluralform
= gpo
.po_message_msgid_plural(self
._gpo
_message
)
201 if isinstance(pluralform
, str):
202 pluralform
= pluralform
.decode(self
._encoding
)
203 multi
.strings
.append(pluralform
)
208 def setsource(self
, source
):
209 if isinstance(source
, multistring
):
210 source
= source
.strings
211 if isinstance(source
, unicode):
212 source
= source
.encode(self
._encoding
)
213 if isinstance(source
, list):
214 gpo
.po_message_set_msgid(self
._gpo
_message
, str(source
[0]))
216 gpo
.po_message_set_msgid_plural(self
._gpo
_message
, str(source
[1]))
218 gpo
.po_message_set_msgid(self
._gpo
_message
, source
)
219 gpo
.po_message_set_msgid_plural(self
._gpo
_message
, None)
221 source
= property(getsource
, setsource
)
227 plural
= gpo
.po_message_msgstr_plural(self
._gpo
_message
, nplural
)
229 plurals
.append(plural
)
231 plural
= gpo
.po_message_msgstr_plural(self
._gpo
_message
, nplural
)
233 multi
= multistring(plurals
, encoding
=self
._encoding
)
235 multi
= multistring(u
"")
237 multi
= multistring(gpo
.po_message_msgstr(self
._gpo
_message
) or u
"", encoding
=self
._encoding
)
240 def settarget(self
, target
):
241 # for plural strings: convert 'target' into a list
243 if isinstance(target
, multistring
):
244 target
= target
.strings
245 elif isinstance(target
, basestring
):
247 # for non-plurals: check number of items in 'target'
248 elif isinstance(target
,(dict, list)):
252 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target
), target
))
253 # empty the previous list of messages
254 # TODO: the "pypo" implementation does not remove the previous items of
255 # the target, if self.target == target (essentially: comparing only
256 # the first item of a plural string with the single new string)
257 # Maybe this behaviour should be unified.
258 if isinstance(target
, (dict, list)):
260 message
= gpo
.po_message_msgstr_plural(self
._gpo
_message
, i
)
261 while message
is not None:
262 gpo
.po_message_set_msgstr_plural(self
._gpo
_message
, i
, None)
264 message
= gpo
.po_message_msgstr_plural(self
._gpo
_message
, i
)
265 # add the items of a list
266 if isinstance(target
, list):
267 for i
in range(len(target
)):
268 targetstring
= target
[i
]
269 if isinstance(targetstring
, unicode):
270 targetstring
= targetstring
.encode(self
._encoding
)
271 gpo
.po_message_set_msgstr_plural(self
._gpo
_message
, i
, targetstring
)
272 # add the values of a dict
273 elif isinstance(target
, dict):
274 for i
, targetstring
in enumerate(target
.itervalues()):
275 gpo
.po_message_set_msgstr_plural(self
._gpo
_message
, i
, targetstring
)
276 # add a single string
278 if isinstance(target
, unicode):
279 target
= target
.encode(self
._encoding
)
281 gpo
.po_message_set_msgstr(self
._gpo
_message
, "")
283 gpo
.po_message_set_msgstr(self
._gpo
_message
, target
)
284 target
= property(gettarget
, settarget
)
287 """The unique identifier for this unit according to the convensions in
289 id = gpo
.po_message_msgid(self
._gpo
_message
)
290 # Gettext does not consider the plural to determine duplicates, only
291 # the msgid. For generation of .mo files, we might want to use this
292 # code to generate the entry for the hash table, but for now, it is
293 # commented out for conformance to gettext.
294 # plural = gpo.po_message_msgid_plural(self._gpo_message)
295 # if not plural is None:
296 # id = '%s\0%s' % (id, plural)
297 context
= gpo
.po_message_msgctxt(self
._gpo
_message
)
299 id = "%s\04%s" % (context
, id)
302 def getnotes(self
, origin
=None):
304 comments
= gpo
.po_message_comments(self
._gpo
_message
) + \
305 gpo
.po_message_extracted_comments(self
._gpo
_message
)
306 elif origin
== "translator":
307 comments
= gpo
.po_message_comments(self
._gpo
_message
)
308 elif origin
in ["programmer", "developer", "source code"]:
309 comments
= gpo
.po_message_extracted_comments(self
._gpo
_message
)
311 raise ValueError("Comment type not valid")
312 # FIXME this fixes a bug in Gettext that returns leading space with comments
314 comments
= "\n".join([line
.strip() for line
in comments
.split("\n")])
315 # Let's drop the last newline
316 return comments
[:-1].decode(self
._encoding
)
318 def addnote(self
, text
, origin
=None, position
="append"):
319 # ignore empty strings and strings without non-space characters
320 if (not text
) or (not text
.strip()):
322 text
= data
.forceunicode(text
)
323 oldnotes
= self
.getnotes(origin
)
326 if position
== "append":
327 newnotes
= oldnotes
+ "\n" + text
328 elif position
== "merge":
330 oldnoteslist
= oldnotes
.split("\n")
331 for newline
in text
.split("\n"):
332 newline
= newline
.rstrip()
333 # avoid duplicate comment lines (this might cause some problems)
334 if newline
not in oldnotes
or len(newline
) < 5:
335 oldnoteslist
.append(newline
)
336 newnotes
= "\n".join(oldnoteslist
)
338 newnotes
= text
+ '\n' + oldnotes
340 newnotes
= "\n".join([line
.rstrip() for line
in text
.split("\n")])
341 # FIXME; workaround the need for leading spaces when adding comments to PO files in libgettexpo
344 for line
in newnotes
.split("\n"):
346 newlines
.append(" " + line
)
348 newlines
.append(line
)
349 newnotes
= "\n".join(newlines
)
350 if origin
in ["programmer", "developer", "source code"]:
351 gpo
.po_message_set_extracted_comments(self
._gpo
_message
, newnotes
)
353 gpo
.po_message_set_comments(self
._gpo
_message
, newnotes
)
355 def removenotes(self
):
356 gpo
.po_message_set_comments(self
._gpo
_message
, "")
359 newpo
= self
.__class
__()
360 newpo
._gpo
_message
= self
._gpo
_message
363 def merge(self
, otherpo
, overwrite
=False, comments
=True, authoritative
=False):
364 """Merges the otherpo (with the same msgid) into this one.
366 Overwrite non-blank self.msgstr only if overwrite is True
367 merge comments only if comments is True
371 if not isinstance(otherpo
, pounit
):
372 super(pounit
, self
).merge(otherpo
, overwrite
, comments
)
375 self
.addnote(otherpo
.getnotes("translator"), origin
="translator", position
="merge")
376 # FIXME mergelists(self.typecomments, otherpo.typecomments)
377 if not authoritative
:
378 # We don't bring across otherpo.automaticcomments as we consider ourself
379 # to be the the authority. Same applies to otherpo.msgidcomments
380 self
.addnote(otherpo
.getnotes("developer"), origin
="developer", position
="merge")
381 self
.msgidcomment
= otherpo
._extract
_msgidcomments
() or None
382 self
.addlocations(otherpo
.getlocations())
383 if not self
.istranslated() or overwrite
:
384 # Remove kde-style comments from the translation (if any).
385 if self
._extract
_msgidcomments
(otherpo
.target
):
386 otherpo
.target
= otherpo
.target
.replace('_: ' + otherpo
._extract
_msgidcomments
()+ '\n', '')
387 self
.target
= otherpo
.target
388 if self
.source
!= otherpo
.source
:
391 self
.markfuzzy(otherpo
.isfuzzy())
392 elif not otherpo
.istranslated():
393 if self
.source
!= otherpo
.source
:
396 if self
.target
!= otherpo
.target
:
400 #return self.source == u"" and self.target != u""
401 # we really want to make sure that there is no msgidcomment or msgctxt
402 return self
.getid() == "" and len(self
.target
) > 0
405 return len(self
.source
) == 0 and len(self
.target
) == 0
407 def hastypecomment(self
, typecomment
):
408 return gpo
.po_message_is_format(self
._gpo
_message
, typecomment
)
410 def hasmarkedcomment(self
, commentmarker
):
411 commentmarker
= "(%s)" % commentmarker
412 for comment
in self
.getnotes("translator").split("\n"):
413 if comment
.startswith(commentmarker
):
417 def istranslated(self
):
418 return super(pounit
, self
).istranslated() and not self
.isobsolete()
420 def istranslatable(self
):
421 return not (self
.isheader() or self
.isblank() or self
.isobsolete())
424 return gpo
.po_message_is_fuzzy(self
._gpo
_message
)
426 def markfuzzy(self
, present
=True):
427 gpo
.po_message_set_fuzzy(self
._gpo
_message
, present
)
430 return self
.hasmarkedcomment("review") or self
.hasmarkedcomment("pofilter")
432 def isobsolete(self
):
433 return gpo
.po_message_is_obsolete(self
._gpo
_message
)
435 def makeobsolete(self
):
436 # FIXME: libgettexpo currently does not reset other data, we probably want to do that
437 # but a better solution would be for libgettextpo to output correct data on serialisation
438 gpo
.po_message_set_obsolete(self
._gpo
_message
, True)
441 gpo
.po_message_set_obsolete(self
._gpo
_message
, False)
444 return gpo
.po_message_msgid_plural(self
._gpo
_message
) is not None
446 def _extract_msgidcomments(self
, text
=None):
447 """Extract KDE style msgid comments from the unit.
450 @return: Returns the extracted msgidcomments found in this unit's msgid.
455 text
= gpo
.po_message_msgid(self
._gpo
_message
)
457 msgidcomment
= re
.search("_: (.*)\n", text
)
459 return msgidcomment
.group(1).decode(self
._encoding
)
467 def getlocations(self
):
470 location
= gpo
.po_message_filepos(self
._gpo
_message
, i
)
472 locname
= gpo
.po_filepos_file(location
)
473 locline
= gpo
.po_filepos_start_line(location
)
477 locstring
= locname
+ ":" + str(locline
)
478 locations
.append(locstring
)
480 location
= gpo
.po_message_filepos(self
._gpo
_message
, i
)
483 def addlocation(self
, location
):
484 for loc
in location
.split():
485 parts
= loc
.split(":")
491 gpo
.po_message_add_filepos(self
._gpo
_message
, file, line
)
493 def getcontext(self
):
494 msgctxt
= gpo
.po_message_msgctxt(self
._gpo
_message
)
495 msgidcomment
= self
._extract
_msgidcomments
()
497 return msgctxt
+ msgidcomment
501 class pofile(pocommon
.pofile
):
503 def __init__(self
, inputfile
=None, encoding
=None, unitclass
=pounit
):
504 self
.UnitClass
= unitclass
505 pocommon
.pofile
.__init
__(self
, unitclass
=unitclass
)
506 self
._gpo
_memory
_file
= None
507 self
._gpo
_message
_iterator
= None
508 self
._encoding
= encodingToUse(encoding
)
509 if inputfile
is not None:
510 self
.parse(inputfile
)
512 self
._gpo
_memory
_file
= gpo
.po_file_create()
513 self
._gpo
_message
_iterator
= gpo
.po_message_iterator(self
._gpo
_memory
_file
, None)
515 def addunit(self
, unit
):
516 gpo
.po_message_insert(self
._gpo
_message
_iterator
, unit
._gpo
_message
)
517 self
.units
.append(unit
)
519 def removeduplicates(self
, duplicatestyle
="merge"):
520 """make sure each msgid is unique ; merge comments etc from duplicates into original"""
523 # we sometimes need to keep track of what has been marked
524 # TODO: this is using a list as the pos aren't hashable, but this is slow...
526 def addcomment(thepo
):
527 thepo
.msgidcomment
= " ".join(thepo
.getlocations())
528 markedpos
.append(thepo
)
529 for thepo
in self
.units
:
531 uniqueunits
.append(thepo
)
533 if duplicatestyle
.startswith("msgid_comment"):
534 msgid
= thepo
._extract
_msgidcomments
() + thepo
.source
537 if duplicatestyle
== "msgid_comment_all":
539 uniqueunits
.append(thepo
)
540 elif msgid
in msgiddict
:
541 if duplicatestyle
== "merge":
543 msgiddict
[msgid
].merge(thepo
)
546 uniqueunits
.append(thepo
)
547 elif duplicatestyle
== "keep":
548 uniqueunits
.append(thepo
)
549 elif duplicatestyle
== "msgid_comment":
550 origpo
= msgiddict
[msgid
]
551 if origpo
not in markedpos
:
554 uniqueunits
.append(thepo
)
555 elif duplicatestyle
== "msgctxt":
556 origpo
= msgiddict
[msgid
]
557 if origpo
not in markedpos
:
558 gpo
.po_message_set_msgctxt(origpo
._gpo
_message
, " ".join(origpo
.getlocations()))
559 markedpos
.append(thepo
)
560 gpo
.po_message_set_msgctxt(thepo
._gpo
_message
, " ".join(thepo
.getlocations()))
561 uniqueunits
.append(thepo
)
563 if not msgid
and duplicatestyle
!= "keep":
565 msgiddict
[msgid
] = thepo
566 uniqueunits
.append(thepo
)
567 new_gpo_memory_file
= gpo
.po_file_create()
568 new_gpo_message_iterator
= gpo
.po_message_iterator(new_gpo_memory_file
, None)
569 for unit
in uniqueunits
:
570 gpo
.po_message_insert(new_gpo_message_iterator
, unit
._gpo
_message
)
571 gpo
.po_message_iterator_free(self
._gpo
_message
_iterator
)
572 self
._gpo
_message
_iterator
= new_gpo_message_iterator
573 self
._gpo
_memory
_file
= new_gpo_memory_file
574 self
.units
= uniqueunits
577 def obsolete_workaround():
578 # Remove all items that are not output by msgmerge when a unit is obsolete. This is a work
579 # around for bug in libgettextpo
580 # FIXME Do version test in case they fix this bug
581 for unit
in self
.units
:
582 if unit
.isobsolete():
583 gpo
.po_message_set_extracted_comments(unit
._gpo
_message
, "")
584 location
= gpo
.po_message_filepos(unit
._gpo
_message
, 0)
586 gpo
.po_message_remove_filepos(unit
._gpo
_message
, 0)
587 location
= gpo
.po_message_filepos(unit
._gpo
_message
, 0)
589 if self
._gpo
_memory
_file
:
590 obsolete_workaround()
591 f
= tempfile
.NamedTemporaryFile(prefix
='translate', suffix
='.po')
592 self
._gpo
_memory
_file
= gpo
.po_file_write_v2(self
._gpo
_memory
_file
, f
.name
, xerror_handler
)
594 outputstring
= f
.read()
599 """Returns True if the object doesn't contain any translation units."""
600 if len(self
.units
) == 0:
602 # Skip the first unit if it is a header.
603 if self
.units
[0].isheader():
604 units
= self
.units
[1:]
609 if not unit
.isblank() and not unit
.isobsolete():
613 def parse(self
, input):
614 if hasattr(input, 'name'):
615 self
.filename
= input.name
616 elif not getattr(self
, 'filename', ''):
619 if hasattr(input, "read"):
624 needtmpfile
= not os
.path
.isfile(input)
626 # This is not a file - we write the string to a temporary file
627 fd
, fname
= tempfile
.mkstemp(prefix
='translate', suffix
='.po')
632 self
._gpo
_memory
_file
= gpo
.po_file_read_v3(input, xerror_handler
)
633 if self
._gpo
_memory
_file
is None:
634 print >> sys
.stderr
, "Error:"
639 # Handle xerrors here
640 self
._header
= gpo
.po_file_domain_header(self
._gpo
_memory
_file
, None)
642 charset
= gpo
.po_header_field(self
._header
, "Content-Type")
644 charset
= re
.search("charset=([^\\s]+)", charset
).group(1)
645 self
._encoding
= encodingToUse(charset
)
646 self
._gpo
_message
_iterator
= gpo
.po_message_iterator(self
._gpo
_memory
_file
, None)
647 newmessage
= gpo
.po_next_message(self
._gpo
_message
_iterator
)
649 newunit
= pounit(gpo_message
=newmessage
)
650 self
.units
.append(newunit
)
651 newmessage
= gpo
.po_next_message(self
._gpo
_message
_iterator
)
652 self
._free
_iterator
()
655 # We currently disable this while we still get segmentation faults.
656 # Note that this is definitely leaking memory because of this.
658 self
._free
_iterator
()
659 if self
._gpo
_memory
_file
is not None:
660 gpo
.po_file_free(self
._gpo
_memory
_file
)
661 self
._gpo
_memory
_file
= None
663 def _free_iterator(self
):
664 # We currently disable this while we still get segmentation faults.
665 # Note that this is definitely leaking memory because of this.
667 if self
._gpo
_message
_iterator
is not None:
668 gpo
.po_message_iterator_free(self
._gpo
_message
_iterator
)
669 self
._gpo
_message
_iterator
= None