cvs2svn_lib/metadata_database.py

   1 # (Be in -*- python -*- mode.)
   2 #
   3 # ====================================================================
   4 # Copyright (c) 2000-2009 CollabNet.  All rights reserved.
   5 #
   6 # This software is licensed as described in the file COPYING, which
   7 # you should have received as part of this distribution.  The terms
   8 # are also available at http://subversion.tigris.org/license-1.html.
   9 # If newer versions of this license are posted there, you may use a
  10 # newer version instead, at your option.
  11 #
  12 # This software consists of voluntary contributions made by many
  13 # individuals.  For exact contribution history, see the revision
  14 # history and logs, available at http://cvs2svn.tigris.org/.
  15 # ====================================================================
  16
  17 """This module contains classes to manage CVSRevision metadata."""
  18
  19
  20 try:
  21   from hashlib import sha1
  22 except ImportError:
  23   from sha import new as sha1
  24
  25 from cvs2svn_lib.context import Ctx
  26 from cvs2svn_lib.indexed_database import IndexedDatabase
  27 from cvs2svn_lib.key_generator import KeyGenerator
  28 from cvs2svn_lib.serializer import PrimedPickleSerializer
  29 from cvs2svn_lib.metadata import Metadata
  30
  31
  32 def MetadataDatabase(store_filename, index_table_filename, mode):
  33   """A database to store Metadata instances that describe CVSRevisions.
  34
  35   This database manages a map
  36
  37       id -> Metadata instance
  38
  39   where id is a unique identifier for the metadata."""
  40
  41   return IndexedDatabase(
  42       store_filename, index_table_filename,
  43       mode, PrimedPickleSerializer((Metadata,)),
  44       )
  45
  46
  47 class MetadataLogger:
  48   """Store and generate IDs for the metadata associated with CVSRevisions.
  49
  50   We want CVSRevisions that might be able to be combined to have the
  51   same metadata ID, so we want a one-to-one relationship id <->
  52   metadata.  We could simply construct a map {metadata : id}, but the
  53   map would grow too large.  Therefore, we generate a digest
  54   containing the significant parts of the metadata, and construct a
  55   map {digest : id}.
  56
  57   To get the ID for a new set of metadata, we first create the digest.
  58   If there is already an ID registered for that digest, we simply
  59   return it.  If not, we generate a new ID, store the metadata in the
  60   metadata database under that ID, record the mapping {digest : id},
  61   and return the new id.
  62
  63   What metadata is included in the digest?  The author, log_msg,
  64   project_id (if Ctx().cross_project_commits is not set), and
  65   branch_name (if Ctx().cross_branch_commits is not set)."""
  66
  67   def __init__(self, metadata_db):
  68     self._metadata_db = metadata_db
  69
  70     # A map { digest : id }:
  71     self._digest_to_id = {}
  72
  73     # A key_generator to generate keys for metadata that haven't been
  74     # seen yet:
  75     self.key_generator = KeyGenerator()
  76
  77   def store(self, project, branch_name, author, log_msg):
  78     """Store the metadata and return its id.
  79
  80     Locate the record for a commit with the specified (PROJECT,
  81     BRANCH_NAME, AUTHOR, LOG_MSG) and return its id.  (Depending on
  82     policy, not all of these items are necessarily used when creating
  83     the unique id.)  If there is no such record, create one and return
  84     its newly-generated id."""
  85
  86     key = [author, log_msg]
  87     if not Ctx().cross_project_commits:
  88       key.append('%x' % project.id)
  89     if not Ctx().cross_branch_commits:
  90       key.append(branch_name or '')
  91
  92     digest = sha1('\0'.join(key)).digest()
  93     try:
  94       # See if it is already known:
  95       return self._digest_to_id[digest]
  96     except KeyError:
  97       id = self.key_generator.gen_id()
  98       self._digest_to_id[digest] = id
  99       self._metadata_db[id] = Metadata(id, author, log_msg)
 100       return id
 101
 102