2 # -*- coding: utf-8 -*-
4 # Copyright 2005-2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 """Module for handling XLIFF files for translation.
25 The official recommendation is to use the extention .xlf for XLIFF files.
28 from translate
.storage
import base
29 from translate
.storage
import lisa
30 from lxml
import etree
32 # TODO: handle translation types
34 class xliffunit(lisa
.LISAunit
):
35 """A single term in the xliff file."""
37 rootNode
= "trans-unit"
38 languageNode
= "source"
40 namespace
= 'urn:oasis:names:tc:xliff:document:1.1'
42 #TODO: id and all the trans-unit level stuff
44 def createlanguageNode(self
, lang
, text
, purpose
):
45 """Returns an xml Element setup with given parameters."""
47 #TODO: for now we do source, but we have to test if it is target, perhaps
48 # with parameter. Alternatively, we can use lang, if supplied, since an xliff
49 #file has to conform to the bilingual nature promised by the header.
51 langset
= etree
.Element(self
.namespaced(purpose
))
53 # lisa.setXMLlang(langset, lang)
55 # self.createPHnodes(langset, text)
59 def getlanguageNodes(self
):
60 """We override this to get source and target nodes."""
61 sources
= self
.xmlelement
.findall('.//%s' % self
.namespaced(self
.languageNode
))
62 targets
= self
.xmlelement
.findall('.//%s' % self
.namespaced('target'))
63 sourcesl
= len(sources
)
64 targetsl
= len(targets
)
66 for pair
in zip(sources
, targets
):
67 nodes
.extend(list(pair
))
68 if sourcesl
> targetsl
:
69 nodes
.extend(sources
[- (sourcesl
- targetsl
):])
72 def addalttrans(self
, txt
, origin
=None, lang
=None):
73 """Adds an alt-trans tag and alt-trans components to the unit.
76 @param txt: Alternative translation of the source text.
79 #TODO: support adding a source tag ad match quality attribute. At
80 # the source tag is needed to inject fuzzy matches from a TM.
81 if isinstance(txt
, str):
82 txt
= txt
.decode("utf-8")
83 alttrans
= etree
.SubElement(self
.xmlelement
, self
.namespaced("alt-trans"))
84 alttarget
= etree
.SubElement(alttrans
, self
.namespaced("target"))
87 alttrans
.set("origin", origin
)
89 lisa
.setXMLlang(alttrans
, lang
)
91 def getalttrans(self
, origin
=None):
92 """Returns <alt-trans> for the given origin as a list of units. No
93 origin means all alternatives."""
95 for node
in self
.xmlelement
.findall(".//%s" % self
.namespaced("alt-trans")):
96 if self
.correctorigin(node
, origin
):
97 # We build some mini units that keep the xmlelement. This
98 # makes it easier to delete it if it is passed back to us.
99 newunit
= base
.TranslationUnit(self
.source
)
101 # the source tag is optional
102 sourcenode
= node
.find(".//%s" % self
.namespaced("source"))
103 if not sourcenode
is None:
104 newunit
.source
= lisa
.getText(sourcenode
)
106 # must have one or more targets
107 targetnode
= node
.find(".//%s" % self
.namespaced("target"))
108 newunit
.target
= lisa
.getText(targetnode
)
109 #TODO: support multiple targets better
110 #TODO: support notes in alt-trans
111 newunit
.xmlelement
= node
113 translist
.append(newunit
)
116 def delalttrans(self
, alternative
):
117 """Removes the supplied alternative from the list of alt-trans tags"""
118 self
.xmlelement
.remove(alternative
.xmlelement
)
120 def addnote(self
, text
, origin
=None):
121 """Add a note specifically in a "note" tag"""
122 if isinstance(text
, str):
123 text
= text
.decode("utf-8")
124 note
= etree
.SubElement(self
.xmlelement
, self
.namespaced("note"))
125 note
.text
= text
.strip()
127 note
.set("from", origin
)
129 def getnotelist(self
, origin
=None):
130 """Private method that returns the text from notes matching 'origin' or all notes."""
131 notenodes
= self
.xmlelement
.findall(".//%s" % self
.namespaced("note"))
132 # TODO: consider using xpath to construct initial_list directly
133 # or to simply get the correct text from the outset (just remember to
134 # check for duplication.
135 initial_list
= [lisa
.getText(note
) for note
in notenodes
if self
.correctorigin(note
, origin
)]
137 # Remove duplicate entries from list:
139 notelist
= [dictset
.setdefault(note
, note
) for note
in initial_list
if note
not in dictset
]
143 def getnotes(self
, origin
=None):
144 return '\n'.join(self
.getnotelist(origin
=origin
))
146 def removenotes(self
):
147 """Remove all the translator notes."""
148 notes
= self
.xmlelement
.findall(".//%s" % self
.namespaced("note"))
150 if self
.correctorigin(note
, origin
="translator"):
151 self
.xmlelement
.remove(note
)
153 def adderror(self
, errorname
, errortext
):
154 """Adds an error message to this unit."""
155 #TODO: consider factoring out: some duplication between XLIFF and TMX
156 text
= errorname
+ ': ' + errortext
157 self
.addnote(text
, origin
="pofilter")
160 """Get all error messages."""
161 #TODO: consider factoring out: some duplication between XLIFF and TMX
162 notelist
= self
.getnotelist(origin
="pofilter")
164 for note
in notelist
:
165 errorname
, errortext
= note
.split(': ')
166 errordict
[errorname
] = errortext
169 def isapproved(self
):
170 """States whether this unit is approved."""
171 return self
.xmlelement
.get("approved") == "yes"
173 def markapproved(self
, value
=True):
174 """Mark this unit as approved."""
176 self
.xmlelement
.set("approved", "yes")
178 self
.xmlelement
.set("approved", "no")
181 """States whether this unit needs to be reviewed"""
182 targetnode
= self
.getlanguageNode(lang
=None, index
=1)
183 return not targetnode
is None and \
184 "needs-review" in targetnode
.get("state", "")
186 def markreviewneeded(self
, needsreview
=True, explanation
=None):
187 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note."""
188 targetnode
= self
.getlanguageNode(lang
=None, index
=1)
189 if not targetnode
is None:
191 targetnode
.set("state", "needs-review-translation")
193 self
.addnote(explanation
, origin
="translator")
195 del targetnode
.attrib
["state"]
198 # targetnode = self.getlanguageNode(lang=None, index=1)
199 # return not targetnode is None and \
200 # (targetnode.get("state-qualifier") == "fuzzy-match" or \
201 # targetnode.get("state") == "needs-review-translation")
202 return not self
.isapproved()
204 def markfuzzy(self
, value
=True):
206 self
.markapproved(False)
208 self
.markapproved(True)
209 targetnode
= self
.getlanguageNode(lang
=None, index
=1)
210 if not targetnode
is None:
212 targetnode
.set("state", "needs-review-translation")
214 for attribute
in ["state", "state-qualifier"]:
215 if attribute
in targetnode
.attrib
:
216 del targetnode
.attrib
[attribute
]
218 def settarget(self
, text
, lang
='xx', append
=False):
219 """Sets the target string to the given value."""
220 super(xliffunit
, self
).settarget(text
, lang
, append
)
222 self
.marktranslated()
224 # This code is commented while this will almost always return false.
225 # This way pocount, etc. works well.
226 # def istranslated(self):
227 # targetnode = self.getlanguageNode(lang=None, index=1)
228 # return not targetnode is None and \
229 # (targetnode.get("state") == "translated")
231 def istranslatable(self
):
232 value
= self
.xmlelement
.get("translate")
233 if value
and value
.lower() == 'no':
237 def marktranslated(self
):
238 targetnode
= self
.getlanguageNode(lang
=None, index
=1)
241 if self
.isfuzzy() and "state-qualifier" in targetnode
.attrib
:
243 del targetnode
.attrib
["state-qualifier"]
244 targetnode
.set("state", "translated")
247 self
.xmlelement
.set("id", id)
250 return self
.xmlelement
.get("id") or ""
252 def addlocation(self
, location
):
255 def getlocations(self
):
256 return [self
.getid()]
258 def createcontextgroup(self
, name
, contexts
=None, purpose
=None):
259 """Add the context group to the trans-unit with contexts a list with
260 (type, text) tuples describing each context."""
262 group
= etree
.SubElement(self
.xmlelement
, self
.namespaced("context-group"))
263 group
.set("name", name
)
265 group
.set("purpose", purpose
)
266 for type, text
in contexts
:
267 if isinstance(text
, str):
268 text
= text
.decode("utf-8")
269 context
= etree
.SubElement(group
, self
.namespaced("context"))
271 context
.set("context-type", type)
273 def getcontextgroups(self
, name
):
274 """Returns the contexts in the context groups with the specified name"""
276 grouptags
= self
.xmlelement
.findall(".//%s" % self
.namespaced("context-group"))
277 for group
in grouptags
:
278 if group
.get("name") == name
:
279 contexts
= group
.findall(".//%s" % self
.namespaced("context"))
281 for context
in contexts
:
282 pairs
.append((context
.get("context-type"), lisa
.getText(context
)))
283 groups
.append(pairs
) #not extend
286 def getrestype(self
):
287 """returns the restype attribute in the trans-unit tag"""
288 return self
.xmlelement
.get("restype")
290 def merge(self
, otherunit
, overwrite
=False, comments
=True):
291 #TODO: consider other attributes like "approved"
292 super(xliffunit
, self
).merge(otherunit
, overwrite
, comments
)
294 self
.marktranslated()
295 if otherunit
.isfuzzy():
298 def correctorigin(self
, node
, origin
):
299 """Check against node tag's origin (e.g note or alt-trans)"""
302 elif origin
in node
.get("from", ""):
304 elif origin
in node
.get("origin", ""):
309 class xlifffile(lisa
.LISAfile
):
310 """Class representing a XLIFF file store."""
311 UnitClass
= xliffunit
314 XMLskeleton
= '''<?xml version="1.0" ?>
315 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'>
316 <file original='NoName' source-language='en' datatype='plaintext'>
321 namespace
= 'urn:oasis:names:tc:xliff:document:1.1'
323 def __init__(self
, *args
, **kwargs
):
324 lisa
.LISAfile
.__init
__(self
, *args
, **kwargs
)
325 self
._filename
= "NoName"
328 # Allow the inputfile to override defaults for source and target language.
329 filenode
= self
.document
.find('.//%s' % self
.namespaced('file'))
330 sourcelanguage
= filenode
.get('source-language')
332 self
.setsourcelanguage(sourcelanguage
)
333 targetlanguage
= filenode
.get('target-language')
335 self
.settargetlanguage(targetlanguage
)
338 """Initialise the file header."""
339 filenode
= self
.document
.find(self
.namespaced("file"))
340 filenode
.set("source-language", self
.sourcelanguage
)
341 if self
.targetlanguage
:
342 filenode
.set("target-language", self
.targetlanguage
)
344 def createfilenode(self
, filename
, sourcelanguage
=None, targetlanguage
=None, datatype
='plaintext'):
345 """creates a filenode with the given filename. All parameters are needed
346 for XLIFF compliance."""
347 self
.removedefaultfile()
348 if sourcelanguage
is None:
349 sourcelanguage
= self
.sourcelanguage
350 if targetlanguage
is None:
351 targetlanguage
= self
.targetlanguage
352 filenode
= etree
.Element(self
.namespaced("file"))
353 filenode
.set("original", filename
)
354 filenode
.set("source-language", sourcelanguage
)
356 filenode
.set("target-language", targetlanguage
)
357 filenode
.set("datatype", datatype
)
358 bodyNode
= etree
.SubElement(filenode
, self
.namespaced(self
.bodyNode
))
361 def getfilename(self
, filenode
):
362 """returns the name of the given file"""
363 return filenode
.get("original")
365 def getfilenames(self
):
366 """returns all filenames in this XLIFF file"""
367 filenodes
= self
.document
.findall(self
.namespaced("file"))
368 filenames
= [self
.getfilename(filenode
) for filenode
in filenodes
]
369 filenames
= filter(None, filenames
)
370 if len(filenames
) == 1 and filenames
[0] == '':
374 def getfilenode(self
, filename
):
375 """finds the filenode with the given name"""
376 filenodes
= self
.document
.findall(self
.namespaced("file"))
377 for filenode
in filenodes
:
378 if self
.getfilename(filenode
) == filename
:
382 def getdatatype(self
, filename
=None):
383 """Returns the datatype of the stored file. If no filename is given,
384 the datatype of the first file is given."""
386 node
= self
.getfilenode(filename
)
388 return node
.get("datatype")
390 filenames
= self
.getfilenames()
391 if len(filenames
) > 0 and filenames
[0] != "NoName":
392 return self
.getdatatype(filenames
[0])
395 def removedefaultfile(self
):
396 """We want to remove the default file-tag as soon as possible if we
397 know if still present and empty."""
398 filenodes
= self
.document
.findall(self
.namespaced("file"))
399 if len(filenodes
) > 1:
400 for filenode
in filenodes
:
401 if filenode
.get("original") == "NoName" and \
402 not filenode
.findall(".//%s" % self
.namespaced(self
.UnitClass
.rootNode
)):
403 self
.document
.getroot().remove(filenode
)
406 def getheadernode(self
, filenode
, createifmissing
=False):
407 """finds the header node for the given filenode"""
409 headernode
= list(filenode
.find(self
.namespaced("header")))
410 if not headernode
is None:
412 if not createifmissing
:
414 headernode
= etree
.SubElement(filenode
, self
.namespaced("header"))
417 def getbodynode(self
, filenode
, createifmissing
=False):
418 """finds the body node for the given filenode"""
419 bodynode
= filenode
.find(self
.namespaced("body"))
420 if not bodynode
is None:
422 if not createifmissing
:
424 bodynode
= etree
.SubElement(filenode
, self
.namespaced("body"))
427 def addsourceunit(self
, source
, filename
="NoName", createifmissing
=False):
428 """adds the given trans-unit to the last used body node if the filename has changed it uses the slow method instead (will create the nodes required if asked). Returns success"""
429 if self
._filename
!= filename
:
430 if not self
.switchfile(filename
, createifmissing
):
432 unit
= super(xlifffile
, self
).addsourceunit(source
)
433 self
._messagenum
+= 1
434 unit
.setid("%d" % self
._messagenum
)
435 lisa
.setXMLspace(unit
.xmlelement
, "preserve")
438 def switchfile(self
, filename
, createifmissing
=False):
439 """adds the given trans-unit (will create the nodes required if asked). Returns success"""
440 self
._filename
= filename
441 filenode
= self
.getfilenode(filename
)
443 if not createifmissing
:
445 filenode
= self
.createfilenode(filename
)
446 self
.document
.getroot().append(filenode
)
448 self
.body
= self
.getbodynode(filenode
, createifmissing
=createifmissing
)
449 if self
.body
is None:
451 self
._messagenum
= len(self
.body
.findall(".//%s" % self
.namespaced("trans-unit")))
452 #TODO: was 0 based before - consider
453 # messagenum = len(self.units)
454 #TODO: we want to number them consecutively inside a body/file tag
455 #instead of globally in the whole XLIFF file, but using len(self.units)
459 def creategroup(self
, filename
="NoName", createifmissing
=False, restype
=None):
460 """adds a group tag into the specified file"""
461 if self
._filename
!= filename
:
462 if not self
.switchfile(filename
, createifmissing
):
464 group
= etree
.SubElement(self
.body
, self
.namespaced("group"))
466 group
.set("restype", restype
)
470 self
.removedefaultfile()
471 return super(xlifffile
, self
).__str
__()
473 def parsestring(cls
, storestring
):
474 """Parses the string to return the correct file object"""
475 xliff
= super(xlifffile
, cls
).parsestring(storestring
)
477 header
= xliff
.units
[0]
478 if ("gettext-domain-header" in (header
.getrestype() or "") \
479 or xliff
.getdatatype() == "po") \
480 and cls
.__name
__.lower() != "poxlifffile":
482 xliff
= poxliff
.PoXliffFile
.parsestring(storestring
)
484 parsestring
= classmethod(parsestring
)