pootlefile.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 #
   4 # Copyright 2004-2006 Zuza Software Foundation
   5 #
   6 # This file is part of translate.
   7 #
   8 # translate is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2 of the License, or
  11 # (at your option) any later version.
  12 #
  13 # translate is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with translate; if not, write to the Free Software
  20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  21
  22 """manages a translation file and its associated files"""
  23
  24 from translate.storage import base
  25 from translate.storage import po
  26 from translate.storage.poheader import tzstring
  27 from translate.storage import xliff
  28 from translate.storage import factory
  29 from translate.filters import checks
  30 from translate.misc.multistring import multistring
  31 from Pootle import __version__
  32 from Pootle import statistics
  33 from jToolkit import timecache
  34 from jToolkit import glock
  35 import time
  36 import os
  37
  38 _UNIT_CHECKER = checks.UnitChecker()
  39
  40 class LockedFile:
  41   """locked interaction with a filesystem file"""
  42   #Locking is disabled for now since it impacts performance negatively and was
  43   #not complete yet anyway. Reverse svn revision 5271 to regain the locking
  44   #code here.
  45   def __init__(self, filename):
  46     self.filename = filename
  47     self.lock = None
  48
  49   def initlock(self):
  50     self.lock = glock.GlobalLock(self.filename + os.extsep + "lock")
  51
  52   def dellock(self):
  53     del self.lock
  54     self.lock = None
  55
  56   def readmodtime(self):
  57     """returns the modification time of the file (locked operation)"""
  58     return statistics.getmodtime(self.filename)
  59
  60   def getcontents(self):
  61     """returns modtime, contents tuple (locked operation)"""
  62     pomtime = statistics.getmodtime(self.filename)
  63     fp = open(self.filename, 'r')
  64     filecontents = fp.read()
  65     fp.close()
  66     return pomtime, filecontents
  67
  68   def writecontents(self, contents):
  69     """writes contents to file, returning modification time (locked operation)"""
  70     f = open(self.filename, 'w')
  71     f.write(contents)
  72     f.close()
  73     pomtime = statistics.getmodtime(self.filename)
  74     return pomtime
  75
  76 class pootleassigns:
  77   """this represents the assignments for a file"""
  78   def __init__(self, basefile):
  79     """constructs assignments object for the given file"""
  80     # TODO: try and remove circular references between basefile and this class
  81     self.basefile = basefile
  82     self.assignsfilename = self.basefile.filename + os.extsep + "assigns"
  83     self.getassigns()
  84
  85   def getassigns(self):
  86     """reads the assigns if neccessary or returns them from the cache"""
  87     if os.path.exists(self.assignsfilename):
  88       self.assigns = self.readassigns()
  89     else:
  90       self.assigns = {}
  91     return self.assigns
  92
  93   def readassigns(self):
  94     """reads the assigns from the associated assigns file, returning the assigns
  95     the format is a number of lines consisting of
  96     username: action: itemranges
  97     where itemranges is a comma-separated list of item numbers or itemranges like 3-5
  98     e.g.  pootlewizz: review: 2-99,101"""
  99     assignsmtime = statistics.getmodtime(self.assignsfilename)
 100     if assignsmtime == getattr(self, "assignsmtime", None):
 101       return
 102     assignsfile = open(self.assignsfilename, "r")
 103     assignsstring = assignsfile.read()
 104     assignsfile.close()
 105     poassigns = {}
 106     itemcount = len(getattr(self, "stats", {}).get("total", []))
 107     for line in assignsstring.split("\n"):
 108       if not line.strip():
 109         continue
 110       if not line.count(":") == 2:
 111         print "invalid assigns line in %s: %r" % (self.assignsfilename, line)
 112         continue
 113       username, action, itemranges = line.split(":", 2)
 114       username, action = username.strip().decode('utf-8'), action.strip().decode('utf-8')
 115       if not username in poassigns:
 116         poassigns[username] = {}
 117       userassigns = poassigns[username]
 118       if not action in userassigns:
 119         userassigns[action] = []
 120       items = userassigns[action]
 121       for itemrange in itemranges.split(","):
 122         if "-" in itemrange:
 123           if not itemrange.count("-") == 1:
 124             print "invalid assigns range in %s: %r (from line %r)" % (self.assignsfilename, itemrange, line)
 125             continue
 126           itemstart, itemstop = [int(item.strip()) for item in itemrange.split("-", 1)]
 127           items.extend(range(itemstart, itemstop+1))
 128         else:
 129           item = int(itemrange.strip())
 130           items.append(item)
 131       if itemcount:
 132         items = [item for item in items if 0 <= item < itemcount]
 133       userassigns[action] = items
 134     return poassigns
 135
 136   def assignto(self, item, username, action):
 137     """assigns the item to the given username for the given action"""
 138     userassigns = self.assigns.setdefault(username, {})
 139     items = userassigns.setdefault(action, [])
 140     if item not in items:
 141       items.append(item)
 142     self.saveassigns()
 143
 144   def unassign(self, item, username=None, action=None):
 145     """removes assignments of the item to the given username (or all users) for the given action (or all actions)"""
 146     if username is None:
 147       usernames = self.assigns.keys()
 148     else:
 149       usernames = [username]
 150     for username in usernames:
 151       userassigns = self.assigns.setdefault(username, {})
 152       if action is None:
 153         itemlist = [userassigns.get(action, []) for action in userassigns]
 154       else:
 155         itemlist = [userassigns.get(action, [])]
 156       for items in itemlist:
 157         if item in items:
 158           items.remove(item)
 159     self.saveassigns()
 160
 161   def saveassigns(self):
 162     """saves the current assigns to file"""
 163     # assumes self.assigns is up to date
 164     assignstrings = []
 165     usernames = self.assigns.keys()
 166     usernames.sort()
 167     for username in usernames:
 168       actions = self.assigns[username].keys()
 169       actions.sort()
 170       for action in actions:
 171         items = self.assigns[username][action]
 172         items.sort()
 173         if items:
 174           lastitem = None
 175           rangestart = None
 176           assignstring = "%s: %s: " % (username.encode('utf-8'), action.encode('utf-8'))
 177           for item in items:
 178             if item - 1 == lastitem:
 179               if rangestart is None:
 180                 rangestart = lastitem
 181             else:
 182               if rangestart is not None:
 183                 assignstring += "-%d" % lastitem
 184                 rangestart = None
 185               if lastitem is None:
 186                 assignstring += "%d" % item
 187               else:
 188                 assignstring += ",%d" % item
 189             lastitem = item
 190           if rangestart is not None:
 191             assignstring += "-%d" % lastitem
 192           assignstrings.append(assignstring + "\n")
 193     assignsfile = open(self.assignsfilename, "w")
 194     assignsfile.writelines(assignstrings)
 195     assignsfile.close()
 196
 197   def getunassigned(self, action=None):
 198     """gets all strings that are unassigned (for the given action if given)"""
 199     unassigneditems = range(0, self.basefile.statistics.getitemslen())
 200     self.assigns = self.getassigns()
 201     for username in self.assigns:
 202       if action is not None:
 203         assigneditems = self.assigns[username].get(action, [])
 204       else:
 205         assigneditems = []
 206         for action, actionitems in self.assigns[username].iteritems():
 207           assigneditems += actionitems
 208       unassigneditems = [item for item in unassigneditems if item not in assigneditems]
 209     return unassigneditems
 210
 211   def finditems(self, search):
 212     """returns items that match the .assignedto and/or .assignedaction criteria in the searchobject"""
 213     # search.assignedto == [None] means assigned to nobody
 214     if search.assignedto == [None]:
 215       assignitems = self.getunassigned(search.assignedaction)
 216     else:
 217       # filter based on assign criteria
 218       assigns = self.getassigns()
 219       if search.assignedto:
 220         usernames = [search.assignedto]
 221       else:
 222         usernames = assigns.iterkeys()
 223       assignitems = []
 224       for username in usernames:
 225         if search.assignedaction:
 226           actionitems = assigns[username].get(search.assignedaction, [])
 227           assignitems.extend(actionitems)
 228         else:
 229           for actionitems in assigns[username].itervalues():
 230             assignitems.extend(actionitems)
 231     return assignitems
 232
 233 class pootlebase(object):
 234   pass
 235
 236 class pootlefile(pootlebase):
 237   """this represents a pootle-managed file and its associated files"""
 238   x_generator = "Pootle %s" % __version__.ver
 239   def __init__(self, project=None, pofilename=None):
 240     if pofilename:
 241       self.__class__.__bases__ = (factory.getclass(pofilename),)
 242     super(pootlefile, self).__init__()
 243     self.pofilename = pofilename
 244     if project is None:
 245       from Pootle import projects
 246       self.project = projects.DummyProject(None)
 247       self.checker = None
 248       self.filename = self.pofilename
 249     else:
 250       self.project = project
 251       self.checker = self.project.checker
 252       self.filename = os.path.join(self.project.podir, self.pofilename)
 253
 254     self.lockedfile = LockedFile(self.filename)
 255     # we delay parsing until it is required
 256     self.pomtime = None
 257     self.assigns = None
 258
 259     self.pendingfilename = self.filename + os.extsep + "pending"
 260     self.pendingfile = None
 261     self.statistics = statistics.pootlestatistics(self)
 262     self.tmfilename = self.filename + os.extsep + "tm"
 263     # we delay parsing until it is required
 264     self.pomtime = None
 265     self.tracker = timecache.timecache(20*60)
 266
 267   def parsestring(cls, storestring):
 268     newstore = cls()
 269     newstore.parse(storestring)
 270     return newstore
 271   parsestring = classmethod(parsestring)
 272
 273   def parsefile(cls, storefile):
 274     """Reads the given file (or opens the given filename) and parses back to an object"""
 275     if isinstance(storefile, basestring):
 276         storefile = open(storefile, "r")
 277     if "r" in getattr(storefile, "mode", "r"):
 278       storestring = storefile.read()
 279     else:
 280       storestring = ""
 281     return cls.parsestring(storestring)
 282   parsefile = classmethod(parsefile)
 283
 284   def getheaderplural(self):
 285     """returns values for nplural and plural values.  It tries to see if the
 286     file has it specified (in a po header or similar)."""
 287     try:
 288       return super(pootlefile, self).getheaderplural()
 289     except AttributeError:
 290       return None, None
 291
 292   def updateheaderplural(self, *args, **kwargs):
 293     """updates the file header. If there is an updateheader function in the
 294     underlying store it will be delegated there."""
 295     try:
 296       super(pootlefile, self).updateheaderplural(*args, **kwargs)
 297     except AttributeError:
 298       pass
 299
 300   def updateheader(self, **kwargs):
 301     """updates the file header. If there is an updateheader function in the
 302     underlying store it will be delegated there."""
 303     try:
 304       super(pootlefile, self).updateheader(**kwargs)
 305     except AttributeError:
 306       pass
 307
 308   def readpendingfile(self):
 309     """reads and parses the pending file corresponding to this file"""
 310     if os.path.exists(self.pendingfilename):
 311       inputfile = open(self.pendingfilename, "r")
 312       self.pendingfile = factory.getobject(inputfile, ignore=".pending")
 313     else:
 314       self.pendingfile = po.pofile()
 315
 316   def savependingfile(self):
 317     """saves changes to disk..."""
 318     output = str(self.pendingfile)
 319     outputfile = open(self.pendingfilename, "w")
 320     outputfile.write(output)
 321     outputfile.close()
 322
 323   def readtmfile(self):
 324     """reads and parses the tm file corresponding to this file"""
 325     if os.path.exists(self.tmfilename):
 326       tmmtime = statistics.getmodtime(self.tmfilename)
 327       if tmmtime == getattr(self, "tmmtime", None):
 328         return
 329       inputfile = open(self.tmfilename, "r")
 330       self.tmmtime, self.tmfile = tmmtime, factory.getobject(inputfile, ignore=".tm")
 331     else:
 332       self.tmfile = po.pofile()
 333
 334   def getsuggestions(self, item):
 335     """find all the suggestion items submitted for the given item"""
 336     unit = self.getitem(item)
 337     if isinstance(unit, xliff.xliffunit):
 338       return unit.getalttrans()
 339
 340     locations = unit.getlocations()
 341     self.readpendingfile()
 342     # TODO: review the matching method
 343     suggestpos = [suggestpo for suggestpo in self.pendingfile.units if suggestpo.getlocations() == locations]
 344     return suggestpos
 345
 346   def addsuggestion(self, item, suggtarget, username):
 347     """adds a new suggestion for the given item"""
 348     unit = self.getitem(item)
 349     if isinstance(unit, xliff.xliffunit):
 350       if isinstance(suggtarget, list) and (len(suggtarget) > 0):
 351         suggtarget = suggtarget[0]
 352       unit.addalttrans(suggtarget, origin=username)
 353       self.statistics.reclassifyunit(item)
 354       self.savepofile()
 355       return
 356
 357     self.readpendingfile()
 358     newpo = unit.copy()
 359     if username is not None:
 360       newpo.msgidcomments.append('"_: suggested by %s\\n"' % username)
 361     newpo.target = suggtarget
 362     newpo.markfuzzy(False)
 363     self.pendingfile.addunit(newpo)
 364     self.savependingfile()
 365     self.statistics.reclassifyunit(item)
 366
 367   def deletesuggestion(self, item, suggitem):
 368     """removes the suggestion from the pending file"""
 369     unit = self.getitem(item)
 370     if hasattr(unit, "xmlelement"):
 371       suggestions = self.getsuggestions(item)
 372       unit.delalttrans(suggestions[suggitem])
 373       self.savepofile()
 374     else:
 375       self.readpendingfile()
 376       locations = unit.getlocations()
 377       # TODO: remove the suggestion in a less brutal manner
 378       pendingitems = [pendingitem for pendingitem, suggestpo in enumerate(self.pendingfile.units) if suggestpo.getlocations() == locations]
 379       pendingitem = pendingitems[suggitem]
 380       del self.pendingfile.units[pendingitem]
 381       self.savependingfile()
 382     self.statistics.reclassifyunit(item)
 383
 384   def getsuggester(self, item, suggitem):
 385     """returns who suggested the given item's suggitem if recorded, else None"""
 386     unit = self.getsuggestions(item)[suggitem]
 387     if hasattr(unit, "xmlelement"):
 388       return unit.xmlelement.getAttribute("origin")
 389
 390     for msgidcomment in unit.msgidcomments:
 391       if msgidcomment.find("suggested by ") != -1:
 392         suggestedby = po.unquotefrompo([msgidcomment]).replace("_:", "", 1).replace("suggested by ", "", 1).strip()
 393         return suggestedby
 394     return None
 395
 396   def gettmsuggestions(self, item):
 397     """find all the tmsuggestion items submitted for the given item"""
 398     self.readtmfile()
 399     unit = self.getitem(item)
 400     locations = unit.getlocations()
 401     # TODO: review the matching method
 402     # Can't simply use the location index, because we want multiple matches
 403     suggestpos = [suggestpo for suggestpo in self.tmfile.units if suggestpo.getlocations() == locations]
 404     return suggestpos
 405
 406   def track(self, item, message):
 407     """sets the tracker message for the given item"""
 408     self.tracker[item] = message
 409
 410   def readpofile(self):
 411     """reads and parses the main file"""
 412     # make sure encoding is reset so it is read from the file
 413     self.encoding = None
 414     self.units = []
 415     pomtime, filecontents = self.lockedfile.getcontents()
 416     # note: we rely on this not resetting the filename, which we set earlier, when given a string
 417     self.parse(filecontents)
 418     self.pomtime = pomtime
 419
 420   def savepofile(self):
 421     """saves changes to the main file to disk..."""
 422     output = str(self)
 423     self.pomtime = self.lockedfile.writecontents(output)
 424
 425   def pofreshen(self):
 426     """makes sure we have a freshly parsed pofile"""
 427     try:
 428         if self.pomtime != self.lockedfile.readmodtime():
 429           self.readpofile()
 430     except OSError, e:
 431         # If this exception is not triggered by a bad
 432         # symlink, then we have a missing file on our hands...
 433         if not os.path.islink(self.filename):
 434             # ...and thus we rescan our files to get rid of the missing filename
 435             self.project.scanpofiles()
 436         else:
 437             print "%s is a broken symlink" % (self.filename,)
 438
 439   def getoutput(self):
 440     """returns pofile output"""
 441     self.pofreshen()
 442     return super(pootlefile, self).getoutput()
 443
 444   def updateunit(self, item, newvalues, userprefs, languageprefs):
 445     """updates a translation with a new target value"""
 446     self.pofreshen()
 447     unit = self.getitem(item)
 448
 449     if newvalues.has_key("target"):
 450       unit.target = newvalues["target"]
 451     if newvalues.has_key("fuzzy"):
 452       unit.markfuzzy(newvalues["fuzzy"])
 453     if newvalues.has_key("translator_comments"):
 454       unit.removenotes()
 455       if newvalues["translator_comments"]:
 456         unit.addnote(newvalues["translator_comments"])
 457
 458     po_revision_date = time.strftime("%Y-%m-%d %H:%M") + tzstring()
 459     headerupdates = {"PO_Revision_Date": po_revision_date, "X_Generator": self.x_generator}
 460     if userprefs:
 461       if getattr(userprefs, "name", None) and getattr(userprefs, "email", None):
 462         headerupdates["Last_Translator"] = "%s <%s>" % (userprefs.name, userprefs.email)
 463     # XXX: If we needed to add a header, the index value in item will be one out after
 464     # adding the header.
 465     # TODO: remove once we force the PO class to always output headers
 466     force_recache = False
 467     if not self.header():
 468       force_recache = True
 469     self.updateheader(add=True, **headerupdates)
 470     if languageprefs:
 471       nplurals = getattr(languageprefs, "nplurals", None)
 472       pluralequation = getattr(languageprefs, "pluralequation", None)
 473       if nplurals and pluralequation:
 474         self.updateheaderplural(nplurals, pluralequation)
 475     self.savepofile()
 476     if force_recache:
 477       self.statistics.purge_totals()
 478     self.statistics.reclassifyunit(item)
 479
 480   def getitem(self, item):
 481     """Returns a single unit based on the item number."""
 482     return self.units[self.statistics.getstats()["total"][item]]
 483
 484   def iteritems(self, search, lastitem=None):
 485     """iterates through the items in this pofile starting after the given lastitem, using the given search"""
 486     # update stats if required
 487     translatables = self.statistics.getstats()["total"]
 488     if lastitem is None:
 489       minitem = 0
 490     else:
 491       minitem = lastitem + 1
 492     maxitem = len(translatables)
 493     validitems = range(minitem, maxitem)
 494     if search.assignedto or search.assignedaction:
 495       assignitems = self.getassigns().finditems(search)
 496       validitems = [item for item in validitems if item in assignitems]
 497     # loop through, filtering on matchnames if required
 498     for item in validitems:
 499       if not search.matchnames:
 500         yield item
 501       for name in search.matchnames:
 502         if translatables[item] in self.statistics.getstats()[name]:
 503           yield item
 504
 505   def matchitems(self, newfile, uselocations=False):
 506     """matches up corresponding items in this pofile with the given newfile, and returns tuples of matching poitems (None if no match found)"""
 507     if not hasattr(self, "sourceindex"):
 508       self.makeindex()
 509     if not hasattr(newfile, "sourceindex"):
 510       newfile.makeindex()
 511     matches = []
 512     for newpo in newfile.units:
 513       if newpo.isheader():
 514         continue
 515       foundid = False
 516       if uselocations:
 517         newlocations = newpo.getlocations()
 518         mergedlocations = []
 519         for location in newlocations:
 520           if location in mergedlocations:
 521             continue
 522           if location in self.locationindex:
 523             oldpo = self.locationindex[location]
 524             if oldpo is not None:
 525               foundid = True
 526               matches.append((oldpo, newpo))
 527               mergedlocations.append(location)
 528               continue
 529       if not foundid:
 530         # We can't use the multistring, because it might contain more than two
 531         # entries in a PO xliff file. Rather use the singular.
 532         source = unicode(newpo.source)
 533         if source in self.sourceindex:
 534           oldpo = self.sourceindex[source]
 535           matches.append((oldpo, newpo))
 536         else:
 537           matches.append((None, newpo))
 538     # find items that have been removed
 539     matcheditems = [oldpo for oldpo, newpo in matches if oldpo]
 540     for oldpo in self.units:
 541       if not oldpo in matcheditems:
 542         matches.append((oldpo, None))
 543     return matches
 544
 545   def getassigns(self):
 546     if self.assigns is None:
 547         self.assigns = pootleassigns(self)
 548     return self.assigns
 549
 550   def mergeitem(self, oldpo, newpo, username, suggest=False):
 551     """merges any changes from newpo into oldpo"""
 552     unchanged = oldpo.target == newpo.target
 553     if not suggest and (not oldpo.target or not newpo.target or oldpo.isheader() or newpo.isheader() or unchanged):
 554       oldpo.merge(newpo)
 555     else:
 556       for item in self.statistics.getstats()["total"]:
 557         matchpo = self.units[item]
 558         if matchpo == oldpo:
 559           strings = getattr(newpo.target, "strings", [newpo.target])
 560           self.addsuggestion(item, strings, username)
 561           return
 562       raise KeyError("Could not find item for merge")
 563
 564   def mergefile(self, newfile, username, allownewstrings=True, suggestions=False):
 565     """make sure each msgid is unique ; merge comments etc from duplicates into original"""
 566     self.makeindex()
 567     matches = self.matchitems(newfile)
 568     for oldpo, newpo in matches:
 569       if suggestions:
 570         if oldpo and newpo:
 571             self.mergeitem(oldpo, newpo, username, suggest=True)
 572         continue
 573
 574       if oldpo is None:
 575         if allownewstrings:
 576           if isinstance(newpo, po.pounit):
 577             self.addunit(newpo)
 578           else:
 579             self.addunit(self.UnitClass.buildfromunit(newpo))
 580       elif newpo is None:
 581         # TODO: mark the old one as obsolete
 582         pass
 583       else:
 584         self.mergeitem(oldpo, newpo, username)
 585         # we invariably want to get the ids (source locations) from the newpo
 586         if hasattr(newpo, "sourcecomments"):
 587           oldpo.sourcecomments = newpo.sourcecomments
 588
 589     if not isinstance(newfile, po.pofile) or suggestions:
 590       #TODO: We don't support updating the header yet.
 591       self.savepofile()
 592       # the easiest way to recalculate everything
 593       self.readpofile()
 594       return
 595
 596     #Let's update selected header entries. Only the ones listed below, and ones
 597     #that are empty in self can be updated. The check in header_order is just
 598     #a basic sanity check so that people don't insert garbage.
 599     updatekeys = ['Content-Type',
 600                   'POT-Creation-Date',
 601                   'Last-Translator',
 602                   'Project-Id-Version',
 603                   'PO-Revision-Date',
 604                   'Language-Team']
 605     headerstoaccept = {}
 606     ownheader = self.parseheader()
 607     for (key, value) in newfile.parseheader().items():
 608       if key in updatekeys or (not key in ownheader or not ownheader[key]) and key in po.pofile.header_order:
 609         headerstoaccept[key] = value
 610     self.updateheader(add=True, **headerstoaccept)
 611
 612     #Now update the comments above the header:
 613     header = self.header()
 614     newheader = newfile.header()
 615     if header is None and not newheader is None:
 616       header = self.UnitClass("", encoding=self.encoding)
 617       header.target = ""
 618     if header:
 619       header._initallcomments(blankall=True)
 620       if newheader:
 621         for i in range(len(header.allcomments)):
 622           header.allcomments[i].extend(newheader.allcomments[i])
 623
 624     self.savepofile()
 625     # the easiest way to recalculate everything
 626     self.readpofile()
 627
 628 class Search:
 629   """an object containing all the searching information"""
 630   def __init__(self, dirfilter=None, matchnames=[], assignedto=None, assignedaction=None, searchtext=None):
 631     self.dirfilter = dirfilter
 632     self.matchnames = matchnames
 633     self.assignedto = assignedto
 634     self.assignedaction = assignedaction
 635     self.searchtext = searchtext
 636
 637   def copy(self):
 638     """returns a copy of this search"""
 639     return Search(self.dirfilter, self.matchnames, self.assignedto, self.assignedaction, self.searchtext)
 640