ganeti_web/models.py

   1 # coding: utf-8
   2
   3 # Copyright (C) 2010 Oregon State University et al.
   4 # Copyright (C) 2010 Greek Research and Technology Network
   5 #
   6 # This program is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU General Public License
   8 # as published by the Free Software Foundation; either version 2
   9 # of the License, or (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 # GNU General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
  19 # USA.
  20
  21 import binascii
  22 import cPickle
  23 from datetime import datetime, timedelta
  24 from hashlib import sha1
  25 import random
  26 import re
  27 import string
  28 import sys
  29 import time
  30
  31 from django.conf import settings
  32
  33 from django.contrib.auth.models import User, Group
  34 from django.contrib.contenttypes.generic import GenericForeignKey
  35 from django.contrib.contenttypes.models import ContentType
  36 from django.contrib.sites import models as sites_app
  37 from django.contrib.sites.management import create_default_site
  38 from django.core.validators import RegexValidator, MinValueValidator
  39 from django.db import models
  40 from django.db.models import BooleanField, Q, Sum
  41 from django.db.models.query import QuerySet
  42 from django.db.models.signals import post_save, post_syncdb
  43 from django.db.utils import DatabaseError
  44 from django.utils.encoding import force_unicode
  45 from django.utils.translation import ugettext_lazy as _
  46
  47 from django_fields.fields import PickleField
  48
  49 from ganeti_web.logs import register_log_actions
  50
  51 from object_log.models import LogItem
  52 log_action = LogItem.objects.log_action
  53
  54 from object_permissions.registration import register
  55
  56 from muddle_users import signals as muddle_user_signals
  57
  58 from ganeti_web import constants, management, permissions
  59 from ganeti_web.fields import (PatchedEncryptedCharField,
  60                                PreciseDateTimeField, SumIf)
  61 from ganeti_web.util import client
  62 from ganeti_web.util.client import GanetiApiError, REPLACE_DISK_AUTO
  63
  64 from south.signals import post_migrate
  65
  66 if settings.VNC_PROXY:
  67     from ganeti_web.util.vncdaemon.vapclient import (request_forwarding,
  68                                                      request_ssh)
  69
  70
  71 class QuerySetManager(models.Manager):
  72     """
  73     Useful if you want to define manager methods that need to chain. In this
  74     case create a QuerySet class within your model and add all of your methods
  75     directly to the queryset. Example:
  76
  77     class Foo(models.Model):
  78         enabled = fields.BooleanField()
  79         dirty = fields.BooleanField()
  80
  81         class QuerySet:
  82             def active(self):
  83                 return self.filter(enabled=True)
  84             def clean(self):
  85                 return self.filter(dirty=False)
  86
  87     Foo.objects.active().clean()
  88     """
  89
  90     def __getattr__(self, name, *args):
  91         # Cull under/dunder names to avoid certain kinds of recursion. Django
  92         # isn't super-bright here.
  93         if name.startswith('_'):
  94             raise AttributeError
  95         return getattr(self.get_query_set(), name, *args)
  96
  97     def get_query_set(self):
  98         return self.model.QuerySet(self.model)
  99
 100
 101 def generate_random_password(length=12):
 102     "Generate random sequence of specified length"
 103     return "".join(random.sample(string.letters + string.digits, length))
 104
 105 FINISHED_JOBS = 'success', 'unknown', 'error'
 106
 107 RAPI_CACHE = {}
 108 RAPI_CACHE_HASHES = {}
 109
 110
 111 def get_rapi(hash, cluster):
 112     """
 113     Retrieves the cached Ganeti RAPI client for a given hash.  The Hash is
 114     derived from the connection credentials required for a cluster.  If the
 115     client is not yet cached, it will be created and added.
 116
 117     If a hash does not correspond to any cluster then Cluster.DoesNotExist will
 118     be raised.
 119
 120     @param cluster - either a cluster object, or ID of object.  This is used
 121     for resolving the cluster if the client is not already found.  The id is
 122     used rather than the hash, because the hash is mutable.
 123
 124     @return a Ganeti RAPI client.
 125     """
 126     if hash in RAPI_CACHE:
 127         return RAPI_CACHE[hash]
 128
 129     # always look up the instance, even if we were given a Cluster instance
 130     # it ensures we are retrieving the latest credentials.  This helps avoid
 131     # stale credentials.  Retrieve only the values because we don't actually
 132     # need another Cluster instance here.
 133     if isinstance(cluster, (Cluster,)):
 134         cluster = cluster.id
 135     (credentials,) = Cluster.objects.filter(id=cluster) \
 136         .values_list('hash', 'hostname', 'port', 'username', 'password')
 137     hash, host, port, user, password = credentials
 138     user = user or None
 139     # decrypt password
 140     # XXX django-fields only stores str, convert to None if needed
 141     password = Cluster.decrypt_password(password) if password else None
 142     password = None if password in ('None', '') else password
 143
 144     # now that we know hash is fresh, check cache again. The original hash
 145     # could have been stale. This avoids constructing a new RAPI that already
 146     # exists.
 147     if hash in RAPI_CACHE:
 148         return RAPI_CACHE[hash]
 149
 150     # delete any old version of the client that was cached.
 151     if cluster in RAPI_CACHE_HASHES:
 152         del RAPI_CACHE[RAPI_CACHE_HASHES[cluster]]
 153
 154     # Set connect timeout in settings.py so that you do not learn patience.
 155     rapi = client.GanetiRapiClient(host, port, user, password,
 156                                    timeout=settings.RAPI_CONNECT_TIMEOUT)
 157     RAPI_CACHE[hash] = rapi
 158     RAPI_CACHE_HASHES[cluster] = hash
 159     return rapi
 160
 161
 162 def clear_rapi_cache():
 163     """
 164     clears the rapi cache
 165     """
 166     RAPI_CACHE.clear()
 167     RAPI_CACHE_HASHES.clear()
 168
 169
 170 ssh_public_key_re = re.compile(
 171     r'^ssh-(rsa|dsa|dss) [A-Z0-9+/=]+ .+$', re.IGNORECASE)
 172 ssh_public_key_error = _("Enter a valid RSA or DSA SSH key.")
 173 validate_sshkey = RegexValidator(ssh_public_key_re, ssh_public_key_error,
 174                                  "invalid")
 175
 176
 177 class CachedClusterObject(models.Model):
 178     """
 179     Parent class for objects which belong to Ganeti but have cached data in
 180     GWM.
 181
 182     The main point of this class is to permit saving lots of data from Ganeti
 183     so that we don't have to look things up constantly. The Ganeti RAPI is
 184     slow, so avoiding it as much as possible is a good idea.
 185
 186     This class provides transparent caching for all of the data that it
 187     serializes; no explicit cache accesses are required.
 188
 189     This model is abstract and may not be instantiated on its own.
 190     """
 191
 192     serialized_info = models.TextField(default="", editable=False)
 193     mtime = PreciseDateTimeField(null=True, editable=False)
 194     cached = PreciseDateTimeField(null=True, editable=False)
 195     ignore_cache = models.BooleanField(default=False)
 196
 197     last_job_id = None
 198     __info = None
 199     error = None
 200     ctime = None
 201     deleted = False
 202
 203     class Meta:
 204         abstract = True
 205
 206     def save(self, *args, **kwargs):
 207         """
 208         overridden to ensure info is serialized prior to save
 209         """
 210         if not self.serialized_info:
 211             self.serialized_info = cPickle.dumps(self.__info)
 212         super(CachedClusterObject, self).save(*args, **kwargs)
 213
 214     def __init__(self, *args, **kwargs):
 215         super(CachedClusterObject, self).__init__(*args, **kwargs)
 216         self.load_info()
 217
 218     @property
 219     def info(self):
 220         """
 221         A dictionary of metadata for this object.
 222
 223         This is a proxy for the ``serialized_info`` field. Reads from this
 224         property lazily access the field, and writes to this property will be
 225         lazily saved.
 226
 227         Writes to this property do *not* force serialization.
 228         """
 229
 230         if self.__info is None:
 231             if self.serialized_info:
 232                 self.__info = cPickle.loads(str(self.serialized_info))
 233         return self.__info
 234
 235     def _set_info(self, value):
 236         self.__info = value
 237         if value is not None:
 238             self.parse_info()
 239             self.serialized_info = ""
 240
 241     info = info.setter(_set_info)
 242
 243     def load_info(self):
 244         """
 245         Load cached info retrieved from the ganeti cluster.  This function
 246         includes a lazy cache mechanism that uses a timer to decide whether or
 247         not to refresh the cached information with new information from the
 248         ganeti cluster.
 249
 250         This will ignore the cache when self.ignore_cache is True
 251         """
 252
 253         epsilon = timedelta(0, 0, 0, settings.LAZY_CACHE_REFRESH)
 254
 255         if self.id:
 256             if (self.ignore_cache
 257                     or self.cached is None
 258                     or datetime.now() > self.cached + epsilon):
 259                 self.refresh()
 260             elif self.info:
 261                 self.parse_transient_info()
 262             else:
 263                 self.error = 'No Cached Info'
 264
 265     def parse_info(self):
 266         """
 267         Parse all of the attached metadata, and attach it to this object.
 268         """
 269
 270         self.parse_transient_info()
 271         data = self.parse_persistent_info(self.info)
 272         for k in data:
 273             setattr(self, k, data[k])
 274
 275     def refresh(self):
 276         """
 277         Retrieve and parse info from the ganeti cluster.  If successfully
 278         retrieved and parsed, this method will also call save().
 279
 280         If communication with Ganeti fails, an error will be stored in
 281         ``error``.
 282         """
 283
 284         job_data = self.check_job_status()
 285         for k, v in job_data.items():
 286             setattr(self, k, v)
 287
 288         # XXX this try/except is far too big; see if we can pare it down.
 289         try:
 290             info_ = self._refresh()
 291             if info_:
 292                 if info_['mtime']:
 293                     mtime = datetime.fromtimestamp(info_['mtime'])
 294                 else:
 295                     mtime = None
 296                 self.cached = datetime.now()
 297             else:
 298                 # no info retrieved, use current mtime
 299                 mtime = self.mtime
 300
 301             if self.id and (self.mtime is None or mtime > self.mtime):
 302                 # there was an update. Set info and save the object
 303                 self.info = info_
 304                 self.save()
 305             else:
 306                 # There was no change on the server. Only update the cache
 307                 # time. This bypasses the info serialization mechanism and
 308                 # uses a smaller query.
 309                 if job_data:
 310                     self.__class__.objects.filter(pk=self.id) \
 311                         .update(cached=self.cached, **job_data)
 312                 elif self.id is not None:
 313                     self.__class__.objects.filter(pk=self.id) \
 314                         .update(cached=self.cached)
 315
 316         except GanetiApiError, e:
 317             # Use regular expressions to match the quoted message
 318             #  given by GanetiApiError. '\\1' is a group substitution
 319             #  which places the first group '('|\")' in it's place.
 320             comp = re.compile("('|\")(?P<msg>.*)\\1")
 321             err = comp.search(str(e))
 322             # Any search that has 0 results will just return None.
 323             #   That is why we must check for err before proceeding.
 324             if err:
 325                 msg = err.groupdict()['msg']
 326                 self.error = msg
 327             else:
 328                 msg = str(e)
 329                 self.error = str(e)
 330             GanetiError.store_error(msg, obj=self, code=e.code)
 331
 332         else:
 333             if self.error:
 334                 self.error = None
 335                 GanetiError.objects.clear_errors(obj=self)
 336
 337     def _refresh(self):
 338         """
 339         Fetch raw data from the Ganeti cluster.
 340
 341         This must be implemented by children of this class.
 342         """
 343
 344         raise NotImplementedError
 345
 346     def check_job_status(self):
 347         if not self.last_job_id:
 348             return {}
 349
 350         ct = ContentType.objects.get_for_model(self)
 351         qs = Job.objects.filter(content_type=ct, object_id=self.pk)
 352         jobs = qs.order_by("job_id")
 353
 354         updates = {}
 355         for job in jobs:
 356             status = 'unknown'
 357             op = None
 358
 359             try:
 360                 data = self.rapi.GetJobStatus(job.job_id)
 361                 status = data['status']
 362                 op = data['ops'][-1]['OP_ID']
 363             except GanetiApiError:
 364                 pass
 365
 366             if status in ('success', 'error'):
 367                 for k, v in Job.parse_persistent_info(data).items():
 368                     setattr(job, k, v)
 369
 370             if status == 'unknown':
 371                 job.status = "unknown"
 372                 job.ignore_cache = False
 373
 374             if status in ('success', 'error', 'unknown'):
 375                 _updates = self._complete_job(self.cluster_id,
 376                                               self.hostname, op, status)
 377                 # XXX if the delete flag is set in updates then delete this
 378                 # model this happens here because _complete_job cannot delete
 379                 # this model
 380                 if _updates:
 381                     if 'deleted' in _updates:
 382                         # Delete ourselves. Also delete the job that caused us
 383                         # to delete ourselves; see #8439 for "fun" details.
 384                         # Order matters; the job's deletion cascades over us.
 385                         # Revisit that when we finally nuke all this caching
 386                         # bullshit.
 387                         self.delete()
 388                         job.delete()
 389                     else:
 390                         updates.update(_updates)
 391
 392         # we only care about the very last job for resetting the cache flags
 393         if status in ('success', 'error', 'unknown') or not jobs:
 394             updates['ignore_cache'] = False
 395             updates['last_job'] = None
 396
 397         return updates
 398
 399     @classmethod
 400     def _complete_job(cls, cluster_id, hostname, op, status):
 401         """
 402         Process a completed job.  This method will make any updates to related
 403         classes (like deleting an instance template) and return any data that
 404         should be updated.  This is a class method so that this processing can
 405         be done without a full instance.
 406
 407         @returns dict of updated values
 408         """
 409
 410         pass
 411
 412     def parse_transient_info(self):
 413         """
 414         Parse properties from cached info that is stored on the class but not
 415         in the database.
 416
 417         These properties will be loaded every time the object is instantiated.
 418         Properties stored on the class cannot be search efficiently via the
 419         django query api.
 420
 421         This method is specific to the child object.
 422         """
 423
 424         info_ = self.info
 425         # XXX ganeti 2.1 ctime is always None
 426         # XXX this means that we could nuke the conditionals!
 427         if info_['ctime'] is not None:
 428             self.ctime = datetime.fromtimestamp(info_['ctime'])
 429
 430     @classmethod
 431     def parse_persistent_info(cls, info):
 432         """
 433         Parse properties from cached info that are stored in the database.
 434
 435         These properties will be searchable by the django query api.
 436
 437         This method is specific to the child object.
 438         """
 439
 440         # mtime is sometimes None if object has never been modified
 441         if info['mtime'] is None:
 442             return {'mtime': None}
 443         return {'mtime': datetime.fromtimestamp(info['mtime'])}
 444
 445
 446 class JobManager(models.Manager):
 447     """
 448     Custom manager for Ganeti Jobs model
 449     """
 450     def create(self, **kwargs):
 451         """ helper method for creating a job with disabled cache """
 452         job = Job(ignore_cache=True, **kwargs)
 453         job.save(force_insert=True)
 454         return job
 455
 456
 457 class Job(CachedClusterObject):
 458     """
 459     model representing a job being run on a ganeti Cluster.  This includes
 460     operations such as creating or delting a virtual machine.
 461
 462     Jobs are a special type of CachedClusterObject.  Job's run once then become
 463     immutable.  The lazy cache is modified to become permanent once a complete
 464     status (success/error) has been detected.  The cache can be disabled by
 465     settning ignore_cache=True.
 466     """
 467
 468     job_id = models.IntegerField()
 469     content_type = models.ForeignKey(ContentType, related_name="+")
 470     object_id = models.IntegerField()
 471     obj = GenericForeignKey('content_type', 'object_id')
 472     cluster = models.ForeignKey('Cluster', related_name='jobs', editable=False)
 473     cluster_hash = models.CharField(max_length=40, editable=False)
 474
 475     finished = models.DateTimeField(null=True, blank=True)
 476     status = models.CharField(max_length=10)
 477     op = models.CharField(max_length=50)
 478
 479     objects = JobManager()
 480
 481     def save(self, *args, **kwargs):
 482         """
 483         sets the cluster_hash for newly saved instances
 484         """
 485         if self.id is None or self.cluster_hash == '':
 486             self.cluster_hash = self.cluster.hash
 487
 488         super(Job, self).save(*args, **kwargs)
 489
 490     @models.permalink
 491     def get_absolute_url(self):
 492         job = '%s/job/(?P<job_id>\d+)' % self.cluster
 493
 494         return 'ganeti_web.views.jobs.detail', (), {'job': job}
 495
 496     @property
 497     def rapi(self):
 498         return get_rapi(self.cluster_hash, self.cluster_id)
 499
 500     def _refresh(self):
 501         return self.rapi.GetJobStatus(self.job_id)
 502
 503     def load_info(self):
 504         """
 505         Load info for class.  This will load from ganeti if ignore_cache==True,
 506         otherwise this will always load from the cache.
 507         """
 508         if self.id and (self.ignore_cache or self.info is None):
 509             try:
 510                 self.refresh()
 511             except GanetiApiError, e:
 512                 # if the Job has been archived then we don't know whether it
 513                 # was successful or not. Mark it as unknown.
 514                 if e.code == 404:
 515                     self.status = 'unknown'
 516                     self.save()
 517                 else:
 518                     # its possible the cluster or crednetials are bad. fail
 519                     # silently
 520                     pass
 521
 522     def refresh(self):
 523         self.info = self._refresh()
 524         self.save()
 525
 526     @classmethod
 527     def parse_persistent_info(cls, info):
 528         """
 529         Parse status and turn off cache bypass flag if job has finished
 530         """
 531         data = {'status': info['status'],
 532                 'op': info['ops'][-1]['OP_ID']}
 533         if data['status'] in ('error', 'success'):
 534             data['ignore_cache'] = False
 535         if info['end_ts']:
 536             data['finished'] = cls.parse_end_timestamp(info)
 537         return data
 538
 539     @staticmethod
 540     def parse_end_timestamp(info):
 541         sec, micro = info['end_ts']
 542         return datetime.fromtimestamp(sec + (micro / 1000000.0))
 543
 544     def parse_transient_info(self):
 545         pass
 546
 547     @property
 548     def current_operation(self):
 549         """
 550         Jobs may consist of multiple commands/operations.  This helper
 551         method will return the operation that is currently running or errored
 552         out, or the last operation if all operations have completed
 553
 554         @returns raw name of the current operation
 555         """
 556         info = self.info
 557         index = 0
 558         for i in range(len(info['opstatus'])):
 559             if info['opstatus'][i] != 'success':
 560                 index = i
 561                 break
 562         return info['ops'][index]['OP_ID']
 563
 564     @property
 565     def operation(self):
 566         """
 567         Returns the last operation, which is generally the primary operation.
 568         """
 569         return self.info['ops'][-1]['OP_ID']
 570
 571     def __repr__(self):
 572         return "<Job %d (%d), status %r>" % (self.id, self.job_id,
 573                                              self.status)
 574
 575     __unicode__ = __repr__
 576
 577
 578 class VirtualMachine(CachedClusterObject):
 579     """
 580     The VirtualMachine (VM) model represents VMs within a Ganeti cluster.
 581
 582     The majority of properties are a cache for data stored in the cluster.
 583     All data retrieved via the RAPI is stored in VirtualMachine.info, and
 584     serialized automatically into VirtualMachine.serialized_info.
 585
 586     Attributes that need to be searchable should be stored as model fields.
 587     All other attributes will be stored within VirtualMachine.info.
 588
 589     This object uses a lazy update mechanism on instantiation.  If the cached
 590     info from the Ganeti cluster has expired, it will trigger an update.  This
 591     allows the cache to function in the absence of a periodic update mechanism
 592     such as Cron, Celery, or Threads.
 593
 594     XXX Serialized_info can possibly be changed to a CharField if an upper
 595         limit can be determined. (Later Date, if it will optimize db)
 596
 597     """
 598     cluster = models.ForeignKey('Cluster', related_name='virtual_machines',
 599                                 editable=False, default=0)
 600     hostname = models.CharField(max_length=128, db_index=True)
 601     owner = models.ForeignKey('ClusterUser', related_name='virtual_machines',
 602                               null=True, blank=True,
 603                               on_delete=models.SET_NULL)
 604     virtual_cpus = models.IntegerField(default=-1)
 605     disk_size = models.IntegerField(default=-1)
 606     ram = models.IntegerField(default=-1)
 607     minram = models.IntegerField(default=-1)
 608     cluster_hash = models.CharField(max_length=40, editable=False)
 609     operating_system = models.CharField(max_length=128)
 610     status = models.CharField(max_length=14)
 611
 612     # node relations
 613     primary_node = models.ForeignKey('Node', related_name='primary_vms',
 614                                      null=True, blank=True)
 615     secondary_node = models.ForeignKey('Node', related_name='secondary_vms',
 616                                        null=True, blank=True)
 617
 618     # The last job reference indicates that there is at least one pending job
 619     # for this virtual machine.  There may be more than one job, and that can
 620     # never be prevented.  This just indicates that job(s) are pending and the
 621     # job related code should be run (status, cleanup, etc).
 622     last_job = models.ForeignKey('Job', related_name="+", null=True,
 623                                  blank=True)
 624
 625     # deleted flag indicates a VM is being deleted, but the job has not
 626     # completed yet.  VMs that have pending_delete are still displayed in lists
 627     # and counted in quotas, but only so status can be checked.
 628     pending_delete = models.BooleanField(default=False)
 629     deleted = False
 630
 631     # Template temporarily stores parameters used to create this virtual
 632     # machine. This template is used to recreate the values entered into the
 633     # form.
 634     template = models.ForeignKey("VirtualMachineTemplate",
 635                                  related_name="instances", null=True,
 636                                  blank=True)
 637
 638     class Meta:
 639         ordering = ["hostname"]
 640         unique_together = (("cluster", "hostname"),)
 641
 642     def __unicode__(self):
 643         return self.hostname
 644
 645     def save(self, *args, **kwargs):
 646         """
 647         sets the cluster_hash for newly saved instances
 648         """
 649         if self.id is None:
 650             self.cluster_hash = self.cluster.hash
 651
 652         info_ = self.info
 653         if info_:
 654             found = False
 655             remove = []
 656             if self.cluster.username:
 657                 for tag in info_['tags']:
 658                     # Update owner Tag. Make sure the tag is set to the owner
 659                     #  that is set in webmgr.
 660                     if tag.startswith(constants.OWNER_TAG):
 661                         id = int(tag[len(constants.OWNER_TAG):])
 662                         # Since there is no 'update tag' delete old tag and
 663                         #  replace with tag containing correct owner id.
 664                         if id == self.owner_id:
 665                             found = True
 666                         else:
 667                             remove.append(tag)
 668                 if remove:
 669                     self.rapi.DeleteInstanceTags(self.hostname, remove)
 670                     for tag in remove:
 671                         info_['tags'].remove(tag)
 672                 if self.owner_id and not found:
 673                     tag = '%s%s' % (constants.OWNER_TAG, self.owner_id)
 674                     self.rapi.AddInstanceTags(self.hostname, [tag])
 675                     self.info['tags'].append(tag)
 676
 677         super(VirtualMachine, self).save(*args, **kwargs)
 678
 679     @models.permalink
 680     def get_absolute_url(self):
 681         """
 682         Return absolute url for this instance.
 683         """
 684
 685         return 'instance-detail', (), {'cluster_slug': self.cluster.slug,
 686                                        'instance': self.hostname}
 687
 688     @property
 689     def rapi(self):
 690         return get_rapi(self.cluster_hash, self.cluster_id)
 691
 692     @property
 693     def is_running(self):
 694         return self.status == 'running'
 695
 696     @classmethod
 697     def parse_persistent_info(cls, info):
 698         """
 699         Loads all values from cached info, included persistent properties that
 700         are stored in the database
 701         """
 702         data = super(VirtualMachine, cls).parse_persistent_info(info)
 703
 704         # Parse resource properties
 705         data['ram'] = info['beparams']['memory']
 706         data['virtual_cpus'] = info['beparams']['vcpus']
 707         # Sum up the size of each disk used by the VM
 708         disk_size = 0
 709         for disk in info['disk.sizes']:
 710             disk_size += disk
 711         data['disk_size'] = disk_size
 712         data['operating_system'] = info['os']
 713         data['status'] = info['status']
 714
 715         primary = info['pnode']
 716         if primary:
 717             try:
 718                 data['primary_node'] = Node.objects.get(hostname=primary)
 719             except Node.DoesNotExist:
 720                 # node is not created yet.  fail silently
 721                 data['primary_node'] = None
 722         else:
 723             data['primary_node'] = None
 724
 725         secondary = info['snodes']
 726         if len(secondary):
 727             secondary = secondary[0]
 728             try:
 729                 data['secondary_node'] = Node.objects.get(hostname=secondary)
 730             except Node.DoesNotExist:
 731                 # node is not created yet.  fail silently
 732                 data['secondary_node'] = None
 733         else:
 734             data['secondary_node'] = None
 735
 736         return data
 737
 738     @classmethod
 739     def _complete_job(cls, cluster_id, hostname, op, status):
 740         """
 741         if the cache bypass is enabled then check the status of the last job
 742         when the job is complete we can reenable the cache.
 743
 744         @returns - dictionary of values that were updates
 745         """
 746
 747         if status == 'unknown':
 748             # unknown status, the job was archived before it's final status
 749             # was polled.  Impossible to tell what happened.  Clear the job
 750             # so it is no longer polled.
 751             #
 752             # XXX This VM might be added by the CLI and be in an invalid
 753             # pending_delete state.  clearing pending_delete prevents this
 754             # but will result in "missing" vms in some cases.
 755             return dict(pending_delete=False)
 756
 757         base = VirtualMachine.objects.filter(cluster=cluster_id,
 758                                              hostname=hostname)
 759         if op == 'OP_INSTANCE_REMOVE':
 760             if status == 'success':
 761                 # XXX can't actually delete here since it would cause a
 762                 # recursive loop
 763                 return dict(deleted=True)
 764
 765         elif op == 'OP_INSTANCE_CREATE' and status == 'success':
 766             # XXX must update before deleting the template to maintain
 767             # referential integrity.  as a consequence return no other
 768             # updates.
 769             base.update(template=None)
 770             VirtualMachineTemplate.objects \
 771                 .filter(instances__hostname=hostname,
 772                         instances__cluster=cluster_id) \
 773                 .delete()
 774             return dict(template=None)
 775         return
 776
 777     def _refresh(self):
 778         # XXX if delete is pending then no need to refresh this object.
 779         if self.pending_delete or self.template_id:
 780             return None
 781         return self.rapi.GetInstance(self.hostname)
 782
 783     def shutdown(self, timeout=None):
 784         if timeout is None:
 785             id = self.rapi.ShutdownInstance(self.hostname)
 786         else:
 787             id = self.rapi.ShutdownInstance(self.hostname, timeout=timeout)
 788
 789         job = Job.objects.create(job_id=id, obj=self,
 790                                  cluster_id=self.cluster_id)
 791         self.last_job = job
 792         VirtualMachine.objects.filter(pk=self.id) \
 793             .update(last_job=job, ignore_cache=True)
 794         return job
 795
 796     def startup(self):
 797         id = self.rapi.StartupInstance(self.hostname)
 798         job = Job.objects.create(job_id=id, obj=self,
 799                                  cluster_id=self.cluster_id)
 800         self.last_job = job
 801         VirtualMachine.objects.filter(pk=self.id) \
 802             .update(last_job=job, ignore_cache=True)
 803         return job
 804
 805     def reboot(self):
 806         id = self.rapi.RebootInstance(self.hostname)
 807         job = Job.objects.create(job_id=id, obj=self,
 808                                  cluster_id=self.cluster_id)
 809         self.last_job = job
 810         VirtualMachine.objects.filter(pk=self.id) \
 811             .update(last_job=job, ignore_cache=True)
 812         return job
 813
 814     def migrate(self, mode='live', cleanup=False):
 815         """
 816         Migrates this VirtualMachine to another node.
 817
 818         Only works if the disk type is DRDB.
 819
 820         @param mode: live or non-live
 821         @param cleanup: clean up a previous migration, default is False
 822         """
 823         id = self.rapi.MigrateInstance(self.hostname, mode, cleanup)
 824         job = Job.objects.create(job_id=id, obj=self,
 825                                  cluster_id=self.cluster_id)
 826         self.last_job = job
 827         VirtualMachine.objects.filter(pk=self.id) \
 828             .update(last_job=job, ignore_cache=True)
 829         return job
 830
 831     def replace_disks(self, mode=REPLACE_DISK_AUTO, disks=None, node=None,
 832                       iallocator=None):
 833         id = self.rapi.ReplaceInstanceDisks(self.hostname, disks, mode, node,
 834                                             iallocator)
 835         job = Job.objects.create(job_id=id, obj=self,
 836                                  cluster_id=self.cluster_id)
 837         self.last_job = job
 838         VirtualMachine.objects.filter(pk=self.id) \
 839             .update(last_job=job, ignore_cache=True)
 840         return job
 841
 842     def setup_ssh_forwarding(self, sport=0):
 843         """
 844         Poke a proxy to start SSH forwarding.
 845
 846         Returns None if no proxy is configured, or if there was an error
 847         contacting the proxy.
 848         """
 849
 850         command = self.rapi.GetInstanceConsole(self.hostname)["command"]
 851
 852         if settings.VNC_PROXY:
 853             proxy_server = settings.VNC_PROXY.split(":")
 854             password = generate_random_password()
 855             sport = request_ssh(proxy_server, sport, self.info["pnode"],
 856                                 self.info["network_port"], password, command)
 857
 858             if sport:
 859                 return proxy_server[0], sport, password
 860
 861     def setup_vnc_forwarding(self, sport=0, tls=False):
 862         """
 863         Obtain VNC forwarding information, optionally configuring a proxy.
 864
 865         Returns None if a proxy is configured and there was an error
 866         contacting the proxy.
 867         """
 868
 869         password = ''
 870         info_ = self.info
 871         port = info_['network_port']
 872         node = info_['pnode']
 873
 874         # use proxy for VNC connection
 875         if settings.VNC_PROXY:
 876             proxy_server = settings.VNC_PROXY.split(":")
 877             password = generate_random_password()
 878             result = request_forwarding(proxy_server, node, port, password,
 879                                         sport=sport, tls=tls)
 880             if result:
 881                 return proxy_server[0], int(result), password
 882         else:
 883             return node, port, password
 884
 885     def __repr__(self):
 886         return "<VirtualMachine: '%s'>" % self.hostname
 887
 888
 889 class Node(CachedClusterObject):
 890     """
 891     The Node model represents nodes within a Ganeti cluster.
 892
 893     The majority of properties are a cache for data stored in the cluster.
 894     All data retrieved via the RAPI is stored in VirtualMachine.info, and
 895     serialized automatically into VirtualMachine.serialized_info.
 896
 897     Attributes that need to be searchable should be stored as model fields.
 898     All other attributes will be stored within VirtualMachine.info.
 899     """
 900
 901     ROLE_CHOICES = ((k, v) for k, v in constants.NODE_ROLE_MAP.items())
 902
 903     cluster = models.ForeignKey('Cluster', related_name='nodes')
 904     hostname = models.CharField(max_length=128, unique=True)
 905     cluster_hash = models.CharField(max_length=40, editable=False)
 906     offline = models.BooleanField()
 907     role = models.CharField(max_length=1, choices=ROLE_CHOICES)
 908     ram_total = models.IntegerField(default=-1)
 909     ram_free = models.IntegerField(default=-1)
 910     disk_total = models.IntegerField(default=-1)
 911     disk_free = models.IntegerField(default=-1)
 912     cpus = models.IntegerField(null=True, blank=True)
 913
 914     # The last job reference indicates that there is at least one pending job
 915     # for this virtual machine.  There may be more than one job, and that can
 916     # never be prevented.  This just indicates that job(s) are pending and the
 917     # job related code should be run (status, cleanup, etc).
 918     last_job = models.ForeignKey('Job', related_name="+", null=True,
 919                                  blank=True)
 920
 921     def __unicode__(self):
 922         return self.hostname
 923
 924     def save(self, *args, **kwargs):
 925         """
 926         sets the cluster_hash for newly saved instances
 927         """
 928         if self.id is None:
 929             self.cluster_hash = self.cluster.hash
 930         super(Node, self).save(*args, **kwargs)
 931
 932     @models.permalink
 933     def get_absolute_url(self):
 934         """
 935         Return absolute url for this node.
 936         """
 937
 938         return 'node-detail', (), {'cluster_slug': self.cluster.slug,
 939                                    'host': self.hostname}
 940
 941     def _refresh(self):
 942         """ returns node info from the ganeti server """
 943         return self.rapi.GetNode(self.hostname)
 944
 945     @property
 946     def rapi(self):
 947         return get_rapi(self.cluster_hash, self.cluster_id)
 948
 949     @classmethod
 950     def parse_persistent_info(cls, info):
 951         """
 952         Loads all values from cached info, included persistent properties that
 953         are stored in the database
 954         """
 955         data = super(Node, cls).parse_persistent_info(info)
 956
 957         # Parse resource properties
 958         data['ram_total'] = info.get("mtotal") or 0
 959         data['ram_free'] = info.get("mfree") or 0
 960         data['disk_total'] = info.get("dtotal") or 0
 961         data['disk_free'] = info.get("dfree") or 0
 962         data['cpus'] = info.get("csockets")
 963         data['offline'] = info['offline']
 964         data['role'] = info['role']
 965         return data
 966
 967     @property
 968     def ram(self):
 969         """ returns dict of free and total ram """
 970         values = VirtualMachine.objects \
 971             .filter(Q(primary_node=self) | Q(secondary_node=self)) \
 972             .filter(status='running') \
 973             .exclude(ram=-1).order_by() \
 974             .aggregate(used=Sum('ram'))
 975
 976         total = self.ram_total
 977         used = total - self.ram_free
 978         allocated = values.get("used") or 0
 979         free = total - allocated if allocated >= 0 and total >= 0 else -1
 980
 981         return {
 982             'total': total,
 983             'free':  free,
 984             'allocated': allocated,
 985             'used': used,
 986         }
 987
 988     @property
 989     def disk(self):
 990         """ returns dict of free and total disk space """
 991         values = VirtualMachine.objects \
 992             .filter(Q(primary_node=self) | Q(secondary_node=self)) \
 993             .exclude(disk_size=-1).order_by() \
 994             .aggregate(used=Sum('disk_size'))
 995
 996         total = self.disk_total
 997         used = total - self.disk_free
 998         allocated = values.get("used") or 0
 999         free = total - allocated if allocated >= 0 and total >= 0 else -1
1000
1001         return {
1002             'total': total,
1003             'free':  free,
1004             'allocated': allocated,
1005             'used': used,
1006         }
1007
1008     @property
1009     def allocated_cpus(self):
1010         values = VirtualMachine.objects \
1011             .filter(primary_node=self, status='running') \
1012             .exclude(virtual_cpus=-1).order_by() \
1013             .aggregate(cpus=Sum('virtual_cpus'))
1014         return values.get("cpus") or 0
1015
1016     def set_role(self, role, force=False):
1017         """
1018         Sets the role for this node
1019
1020         @param role - one of the following choices:
1021             * master
1022             * master-candidate
1023             * regular
1024             * drained
1025             * offline
1026         """
1027         id = self.rapi.SetNodeRole(self.hostname, role, force)
1028         job = Job.objects.create(job_id=id, obj=self,
1029                                  cluster_id=self.cluster_id)
1030         self.last_job = job
1031         Node.objects.filter(pk=self.pk).update(ignore_cache=True, last_job=job)
1032         return job
1033
1034     def evacuate(self, iallocator=None, node=None):
1035         """
1036         migrates all secondary instances off this node
1037         """
1038         id = self.rapi.EvacuateNode(self.hostname, iallocator=iallocator,
1039                                     remote_node=node)
1040         job = Job.objects.create(job_id=id, obj=self,
1041                                  cluster_id=self.cluster_id)
1042         self.last_job = job
1043         Node.objects.filter(pk=self.pk) \
1044             .update(ignore_cache=True, last_job=job)
1045         return job
1046
1047     def migrate(self, mode=None):
1048         """
1049         migrates all primary instances off this node
1050         """
1051         id = self.rapi.MigrateNode(self.hostname, mode)
1052         job = Job.objects.create(job_id=id, obj=self,
1053                                  cluster_id=self.cluster_id)
1054         self.last_job = job
1055         Node.objects.filter(pk=self.pk).update(ignore_cache=True, last_job=job)
1056         return job
1057
1058     def __repr__(self):
1059         return "<Node: '%s'>" % self.hostname
1060
1061
1062 class Cluster(CachedClusterObject):
1063     """
1064     A Ganeti cluster that is being tracked by this manager tool
1065     """
1066     hostname = models.CharField(_('hostname'), max_length=128, unique=True)
1067     slug = models.SlugField(_('slug'), max_length=50, unique=True,
1068                             db_index=True)
1069     port = models.PositiveIntegerField(_('port'), default=5080)
1070     description = models.CharField(_('description'), max_length=128,
1071                                    blank=True)
1072     username = models.CharField(_('username'), max_length=128, blank=True)
1073     password = PatchedEncryptedCharField(_('password'), default="",
1074                                          max_length=128, blank=True)
1075     hash = models.CharField(_('hash'), max_length=40, editable=False)
1076
1077     # quota properties
1078     virtual_cpus = models.IntegerField(_('Virtual CPUs'), null=True,
1079                                        blank=True)
1080     disk = models.IntegerField(_('disk'), null=True, blank=True)
1081     ram = models.IntegerField(_('ram'), null=True, blank=True)
1082
1083     # The last job reference indicates that there is at least one pending job
1084     # for this virtual machine.  There may be more than one job, and that can
1085     # never be prevented.  This just indicates that job(s) are pending and the
1086     # job related code should be run (status, cleanup, etc).
1087     last_job = models.ForeignKey('Job', related_name='cluster_last_job',
1088                                  null=True, blank=True)
1089
1090     class Meta:
1091         ordering = ["hostname", "description"]
1092
1093     def __unicode__(self):
1094         return self.hostname
1095
1096     def save(self, *args, **kwargs):
1097         self.hash = self.create_hash()
1098         super(Cluster, self).save(*args, **kwargs)
1099
1100     @models.permalink
1101     def get_absolute_url(self):
1102         return 'cluster-detail', (), {'cluster_slug': self.slug}
1103
1104     # XXX probably hax
1105     @property
1106     def cluster_id(self):
1107         return self.id
1108
1109     @classmethod
1110     def decrypt_password(cls, value):
1111         """
1112         Convenience method for decrypting a password without an instance.
1113         This was partly cribbed from django-fields which only allows decrypting
1114         from a model instance.
1115
1116         If the password appears to be encrypted, this method will decrypt it;
1117         otherwise, it will return the password unchanged.
1118
1119         This method is bonghits.
1120         """
1121
1122         field, chaff, chaff, chaff = cls._meta.get_field_by_name('password')
1123
1124         if value.startswith(field.prefix):
1125             ciphertext = value[len(field.prefix):]
1126             plaintext = field.cipher.decrypt(binascii.a2b_hex(ciphertext))
1127             password = plaintext.split('\0')[0]
1128         else:
1129             password = value
1130
1131         return force_unicode(password)
1132
1133     @property
1134     def rapi(self):
1135         """
1136         retrieves the rapi client for this cluster.
1137         """
1138         # XXX always pass self in.  not only does it avoid querying this object
1139         # from the DB a second time, it also prevents a recursion loop caused
1140         # by __init__ fetching info from the Cluster
1141         return get_rapi(self.hash, self)
1142
1143     def create_hash(self):
1144         """
1145         Creates a hash for this cluster based on credentials required for
1146         connecting to the server
1147         """
1148         s = '%s%s%s%s' % (self.username, self.password, self.hostname,
1149                           self.port)
1150         return sha1(s).hexdigest()
1151
1152     def get_default_quota(self):
1153         """
1154         Returns the default quota for this cluster
1155         """
1156         return {
1157             "default": 1,
1158             "ram": self.ram,
1159             "disk": self.disk,
1160             "virtual_cpus": self.virtual_cpus,
1161         }
1162
1163     def get_quota(self, user=None):
1164         """
1165         Get the quota for a ClusterUser
1166
1167         @return user's quota, default quota, or none
1168         """
1169         if user is None:
1170             return self.get_default_quota()
1171
1172         # attempt to query user specific quota first. if it does not exist
1173         # then fall back to the default quota
1174         query = Quota.objects.filter(cluster=self, user=user)
1175         quotas = query.values('ram', 'disk', 'virtual_cpus')
1176         if quotas:
1177             quota = quotas[0]
1178             quota['default'] = 0
1179             return quota
1180
1181         return self.get_default_quota()
1182
1183     def set_quota(self, user, data):
1184         """
1185         Set the quota for a ClusterUser.
1186
1187         If data is None, the quota will be removed.
1188
1189         @param values: dictionary of values, or None to delete the quota
1190         """
1191
1192         kwargs = {'cluster': self, 'user': user}
1193         if data is None:
1194             Quota.objects.filter(**kwargs).delete()
1195         else:
1196             quota, new = Quota.objects.get_or_create(**kwargs)
1197             quota.__dict__.update(data)
1198             quota.save()
1199
1200     @classmethod
1201     def get_quotas(cls, clusters=None, user=None):
1202         """ retrieve a bulk list of cluster quotas """
1203
1204         if clusters is None:
1205             clusters = Cluster.objects.all()
1206
1207         quotas = {}
1208         cluster_id_map = {}
1209         for cluster in clusters:
1210             quotas[cluster] = {
1211                 'default': 1,
1212                 'ram': cluster.ram,
1213                 'disk': cluster.disk,
1214                 'virtual_cpus': cluster.virtual_cpus,
1215             }
1216             cluster_id_map[cluster.id] = cluster
1217
1218         # get user's custom queries if any
1219         if user is not None:
1220             qs = Quota.objects.filter(cluster__in=clusters, user=user)
1221             values = qs.values('ram', 'disk', 'virtual_cpus', 'cluster__id')
1222
1223             for custom in values:
1224                 try:
1225                     cluster = cluster_id_map[custom['cluster__id']]
1226                 except KeyError:
1227                     continue
1228                 custom['default'] = 0
1229                 del custom['cluster__id']
1230                 quotas[cluster] = custom
1231
1232         return quotas
1233
1234     def sync_virtual_machines(self, remove=False):
1235         """
1236         Synchronizes the VirtualMachines in the database with the information
1237         this ganeti cluster has:
1238             * VMs no longer in ganeti are deleted
1239             * VMs missing from the database are added
1240         """
1241         ganeti = self.instances()
1242         db = self.virtual_machines.all().values_list('hostname', flat=True)
1243
1244         # add VMs missing from the database
1245         for hostname in filter(lambda x: unicode(x) not in db, ganeti):
1246             vm = VirtualMachine.objects.create(cluster=self, hostname=hostname)
1247             vm.refresh()
1248
1249         # deletes VMs that are no longer in ganeti
1250         if remove:
1251             missing_ganeti = filter(lambda x: str(x) not in ganeti, db)
1252             if missing_ganeti:
1253                 self.virtual_machines \
1254                     .filter(hostname__in=missing_ganeti).delete()
1255
1256     def sync_nodes(self, remove=False):
1257         """
1258         Synchronizes the Nodes in the database with the information
1259         this ganeti cluster has:
1260             * Nodes no longer in ganeti are deleted
1261             * Nodes missing from the database are added
1262         """
1263         ganeti = self.rapi.GetNodes()
1264         db = self.nodes.all().values_list('hostname', flat=True)
1265
1266         # add Nodes missing from the database
1267         for hostname in filter(lambda x: unicode(x) not in db, ganeti):
1268             node = Node.objects.create(cluster=self, hostname=hostname)
1269             node.refresh()
1270
1271         # deletes Nodes that are no longer in ganeti
1272         if remove:
1273             missing_ganeti = filter(lambda x: str(x) not in ganeti, db)
1274             if missing_ganeti:
1275                 self.nodes.filter(hostname__in=missing_ganeti).delete()
1276
1277     @property
1278     def missing_in_ganeti(self):
1279         """
1280         Returns a list of VirtualMachines that are missing from the Ganeti
1281         cluster but present in the database.
1282         """
1283         ganeti = self.instances()
1284         qs = self.virtual_machines.exclude(template__isnull=False)
1285         db = qs.values_list('hostname', flat=True)
1286         return [x for x in db if str(x) not in ganeti]
1287
1288     @property
1289     def missing_in_db(self):
1290         """
1291         Returns list of VirtualMachines that are missing from the database, but
1292         present in ganeti
1293         """
1294         ganeti = self.instances()
1295         db = self.virtual_machines.all().values_list('hostname', flat=True)
1296         return [x for x in ganeti if unicode(x) not in db]
1297
1298     @property
1299     def nodes_missing_in_db(self):
1300         """
1301         Returns list of Nodes that are missing from the database, but present
1302         in ganeti.
1303         """
1304         try:
1305             ganeti = self.rapi.GetNodes()
1306         except GanetiApiError:
1307             ganeti = []
1308         db = self.nodes.all().values_list('hostname', flat=True)
1309         return [x for x in ganeti if unicode(x) not in db]
1310
1311     @property
1312     def nodes_missing_in_ganeti(self):
1313         """
1314         Returns list of Nodes that are missing from the ganeti cluster
1315         but present in the database
1316         """
1317         try:
1318             ganeti = self.rapi.GetNodes()
1319         except GanetiApiError:
1320             ganeti = []
1321         db = self.nodes.all().values_list('hostname', flat=True)
1322         return filter(lambda x: str(x) not in ganeti, db)
1323
1324     @property
1325     def available_ram(self):
1326         """ returns dict of free and total ram """
1327         nodes = self.nodes.exclude(ram_total=-1) \
1328             .aggregate(total=Sum('ram_total'), free=Sum('ram_free'))
1329         total = max(nodes.get("total", 0), 0)
1330         free = max(nodes.get("free", 0), 0)
1331         used = total - free
1332         values = self.virtual_machines \
1333             .filter(status='running') \
1334             .exclude(ram=-1).order_by() \
1335             .aggregate(used=Sum('ram'))
1336
1337         if values.get("used") is None:
1338             allocated = 0
1339         else:
1340             allocated = values["used"]
1341
1342         free = max(total - allocated, 0)
1343
1344         return {
1345             'total': total,
1346             'free': free,
1347             'allocated': allocated,
1348             'used': used,
1349         }
1350
1351     @property
1352     def available_disk(self):
1353         """ returns dict of free and total disk space """
1354         nodes = self.nodes.exclude(disk_total=-1) \
1355             .aggregate(total=Sum('disk_total'), free=Sum('disk_free'))
1356         total = max(nodes.get("total", 0), 0)
1357         free = max(nodes.get("free", 0), 0)
1358         used = total - free
1359         values = self.virtual_machines \
1360             .exclude(disk_size=-1).order_by() \
1361             .aggregate(used=Sum('disk_size'))
1362
1363         if values.get("used") is None:
1364             allocated = 0
1365         else:
1366             allocated = values["used"]
1367
1368         free = max(total - allocated, 0)
1369
1370         return {
1371             'total': total,
1372             'free': free,
1373             'allocated': allocated,
1374             'used': used,
1375         }
1376
1377     def _refresh(self):
1378         return self.rapi.GetInfo()
1379
1380     def instances(self, bulk=False):
1381         """Gets all VMs which reside under the Cluster
1382         Calls the rapi client for all instances.
1383         """
1384         try:
1385             return self.rapi.GetInstances(bulk=bulk)
1386         except GanetiApiError:
1387             return []
1388
1389     def instance(self, instance):
1390         """Get a single Instance
1391         Calls the rapi client for a specific instance.
1392         """
1393         try:
1394             return self.rapi.GetInstance(instance)
1395         except GanetiApiError:
1396             return None
1397
1398     def redistribute_config(self):
1399         """
1400         Redistribute config from cluster's master node to all
1401         other nodes.
1402         """
1403         # no exception handling, because it's being done in a view
1404         id = self.rapi.RedistributeConfig()
1405         job = Job.objects.create(job_id=id, obj=self, cluster_id=self.id)
1406         self.last_job = job
1407         Cluster.objects.filter(pk=self.id) \
1408             .update(last_job=job, ignore_cache=True)
1409         return job
1410
1411
1412 class VirtualMachineTemplate(models.Model):
1413     """
1414     Virtual Machine Template holds all the values for the create virtual
1415     machine form so that they can automatically be used or edited by a user.
1416     """
1417
1418     template_name = models.CharField(max_length=255, default="")
1419     temporary = BooleanField(verbose_name=_("Temporary"), default=False)
1420     description = models.CharField(max_length=255, default="")
1421     cluster = models.ForeignKey(Cluster, related_name="templates", null=True,
1422                                 blank=True)
1423     start = models.BooleanField(verbose_name=_('Start up After Creation'),
1424                                 default=True)
1425     no_install = models.BooleanField(verbose_name=_('Do not install OS'),
1426                                      default=False)
1427     ip_check = BooleanField(verbose_name=_("IP Check"), default=True)
1428     name_check = models.BooleanField(verbose_name=_('DNS Name Check'),
1429                                      default=True)
1430     iallocator = models.BooleanField(verbose_name=_('Automatic Allocation'),
1431                                      default=False)
1432     iallocator_hostname = models.CharField(max_length=255, blank=True)
1433     disk_template = models.CharField(verbose_name=_('Disk Template'),
1434                                      max_length=16)
1435     # XXX why aren't these FKs?
1436     pnode = models.CharField(verbose_name=_('Primary Node'), max_length=255,
1437                              default="")
1438     snode = models.CharField(verbose_name=_('Secondary Node'), max_length=255,
1439                              default="")
1440     os = models.CharField(verbose_name=_('Operating System'), max_length=255)
1441
1442     # Backend parameters (BEPARAMS)
1443     vcpus = models.IntegerField(verbose_name=_('Virtual CPUs'),
1444                                 validators=[MinValueValidator(1)], null=True,
1445                                 blank=True)
1446     # XXX do we really want the minimum memory to be 100MiB? This isn't
1447     # strictly necessary AFAICT.
1448     memory = models.IntegerField(verbose_name=_('Memory'),
1449                                  validators=[MinValueValidator(100)],
1450                                  null=True, blank=True)
1451     minmem = models.IntegerField(verbose_name=_('Minimum Memory'),
1452                                  validators=[MinValueValidator(100)],
1453                                  null=True, blank=True)
1454     disks = PickleField(verbose_name=_('Disks'), null=True, blank=True)
1455     # XXX why isn't this an enum?
1456     disk_type = models.CharField(verbose_name=_('Disk Type'), max_length=255,
1457                                  default="")
1458     nics = PickleField(verbose_name=_('NICs'), null=True, blank=True)
1459     # XXX why isn't this an enum?
1460     nic_type = models.CharField(verbose_name=_('NIC Type'), max_length=255,
1461                                 default="")
1462
1463     # Hypervisor parameters (HVPARAMS)
1464     kernel_path = models.CharField(verbose_name=_('Kernel Path'),
1465                                    max_length=255, default="", blank=True)
1466     root_path = models.CharField(verbose_name=_('Root Path'), max_length=255,
1467                                  default='/', blank=True)
1468     serial_console = models.BooleanField(
1469         verbose_name=_('Enable Serial Console'))
1470     boot_order = models.CharField(verbose_name=_('Boot Device'),
1471                                   max_length=255, default="")
1472     cdrom_image_path = models.CharField(verbose_name=_('CD-ROM Image Path'),
1473                                         max_length=512, blank=True)
1474     cdrom2_image_path = models.CharField(
1475         verbose_name=_('CD-ROM 2 Image Path'),
1476         max_length=512, blank=True)
1477
1478     class Meta:
1479         unique_together = (("cluster", "template_name"),)
1480
1481     def __unicode__(self):
1482         if self.temporary:
1483             return u'(temporary)'
1484         else:
1485             return self.template_name
1486
1487     def set_name(self, name):
1488         """
1489         Set this template's name.
1490
1491         If the name is blank, this template will become temporary and its name
1492         will be set to a unique timestamp.
1493         """
1494
1495         if name:
1496             self.template_name = name
1497         else:
1498             # The template is temporary and will be removed by the VM when the
1499             # VM successfully comes into existence.
1500             self.temporary = True
1501             # Give it a temporary name. Something unique. This is the number
1502             # of microseconds since the epoch; I figure that it'll work out
1503             # alright.
1504             self.template_name = str(int(time.time() * (10 ** 6)))
1505
1506
1507 class GanetiError(models.Model):
1508     """
1509     Class for storing errors which occured in Ganeti
1510     """
1511     cluster = models.ForeignKey(Cluster, related_name="errors")
1512     msg = models.TextField()
1513     code = models.PositiveIntegerField(blank=True, null=True)
1514
1515     # XXX could be fixed with django-model-util's TimeStampedModel
1516     timestamp = models.DateTimeField()
1517
1518     # determines if the errors still appears or not
1519     cleared = models.BooleanField(default=False)
1520
1521     # cluster object (cluster, VM, Node) affected by the error (if any)
1522     obj_type = models.ForeignKey(ContentType, related_name="ganeti_errors")
1523     obj_id = models.PositiveIntegerField()
1524     obj = GenericForeignKey("obj_type", "obj_id")
1525
1526     objects = QuerySetManager()
1527
1528     class Meta:
1529         ordering = ("-timestamp", "code", "msg")
1530
1531     def __unicode__(self):
1532         base = u"[%s] %s" % (self.timestamp, self.msg)
1533         return base
1534
1535     class QuerySet(QuerySet):
1536
1537         def clear_errors(self, obj=None):
1538             """
1539             Clear errors instead of deleting them.
1540             """
1541
1542             qs = self.filter(cleared=False)
1543
1544             if obj:
1545                 qs = qs.get_errors(obj)
1546
1547             return qs.update(cleared=True)
1548
1549         def get_errors(self, obj):
1550             """
1551             Manager method used for getting QuerySet of all errors depending
1552             on passed arguments.
1553
1554             @param  obj   affected object (itself or just QuerySet)
1555             """
1556
1557             if obj is None:
1558                 raise RuntimeError("Implementation error calling get_errors()"
1559                                    "with None")
1560
1561             # Create base query of errors to return.
1562             #
1563             # if it's a Cluster or a queryset for Clusters, then we need to
1564             # get all errors from the Clusters. Do this by filtering on
1565             # GanetiError.cluster instead of obj_id.
1566             if isinstance(obj, (Cluster,)):
1567                 return self.filter(cluster=obj)
1568
1569             elif isinstance(obj, (QuerySet,)):
1570                 if obj.model == Cluster:
1571                     return self.filter(cluster__in=obj)
1572                 else:
1573                     ct = ContentType.objects.get_for_model(obj.model)
1574                     return self.filter(obj_type=ct, obj_id__in=obj)
1575
1576             else:
1577                 ct = ContentType.objects.get_for_model(obj.__class__)
1578                 return self.filter(obj_type=ct, obj_id=obj.pk)
1579
1580     def __repr__(self):
1581         return "<GanetiError '%s'>" % self.msg
1582
1583     @classmethod
1584     def store_error(cls, msg, obj, code, **kwargs):
1585         """
1586         Create and save an error with the given information.
1587
1588         @param  msg  error's message
1589         @param  obj  object (i.e. cluster or vm) affected by the error
1590         @param code  error's code number
1591         """
1592         ct = ContentType.objects.get_for_model(obj.__class__)
1593         is_cluster = isinstance(obj, Cluster)
1594
1595         # 401 -- bad permissions
1596         # 401 is cluster-specific error and thus shouldn't appear on any other
1597         # object.
1598         if code == 401:
1599             if not is_cluster:
1600                 # NOTE: what we do here is almost like:
1601                 #  return self.store_error(msg=msg, code=code, obj=obj.cluster)
1602                 # we just omit the recursiveness
1603                 obj = obj.cluster
1604                 ct = ContentType.objects.get_for_model(Cluster)
1605                 is_cluster = True
1606
1607         # 404 -- object not found
1608         # 404 can occur on any object, but when it occurs on a cluster, then
1609         # any of its children must not see the error again
1610         elif code == 404:
1611             if not is_cluster:
1612                 # return if the error exists for cluster
1613                 try:
1614                     c_ct = ContentType.objects.get_for_model(Cluster)
1615                     return cls.objects.filter(msg=msg, obj_type=c_ct,
1616                                               code=code,
1617                                               obj_id=obj.cluster_id,
1618                                               cleared=False)[0]
1619
1620                 except (cls.DoesNotExist, IndexError):
1621                     # we want to proceed when the error is not
1622                     # cluster-specific
1623                     pass
1624
1625         # XXX use a try/except instead of get_or_create().  get_or_create()
1626         # does not allow us to set cluster_id.  This means we'd have to query
1627         # the cluster object to create the error.  we can't guaranteee the
1628         # cluster will already be queried so use create() instead which does
1629         # allow cluster_id
1630         try:
1631             return cls.objects.filter(msg=msg, obj_type=ct, obj_id=obj.pk,
1632                                       code=code, **kwargs)[0]
1633
1634         except (cls.DoesNotExist, IndexError):
1635             cluster_id = obj.pk if is_cluster else obj.cluster_id
1636
1637             return cls.objects.create(timestamp=datetime.now(), msg=msg,
1638                                       obj_type=ct, obj_id=obj.pk,
1639                                       cluster_id=cluster_id, code=code,
1640                                       **kwargs)
1641
1642
1643 class ClusterUser(models.Model):
1644     """
1645     Base class for objects that may interact with a Cluster or VirtualMachine.
1646     """
1647
1648     name = models.CharField(max_length=128)
1649     real_type = models.ForeignKey(ContentType, related_name="+",
1650                                   editable=False, null=True, blank=True)
1651
1652     def __unicode__(self):
1653         return self.name
1654
1655     def save(self, *args, **kwargs):
1656         if not self.id:
1657             self.real_type = self._get_real_type()
1658         super(ClusterUser, self).save(*args, **kwargs)
1659
1660     @property
1661     def permissable(self):
1662         """ returns an object that can be granted permissions """
1663         return self.cast().permissable
1664
1665     @classmethod
1666     def _get_real_type(cls):
1667         return ContentType.objects.get_for_model(cls)
1668
1669     def cast(self):
1670         return self.real_type.get_object_for_this_type(pk=self.pk)
1671
1672     def used_resources(self, cluster=None, only_running=True):
1673         """
1674         Return dictionary of total resources used by VMs that this ClusterUser
1675         has perms to.
1676         @param cluster  if set, get only VMs from specified cluster
1677         @param only_running  if set, get only running VMs
1678         """
1679         # XXX - order_by must be cleared or it breaks annotation grouping since
1680         #       the default order_by field is also added to the group_by clause
1681         base = self.virtual_machines.all().order_by()
1682
1683         # XXX - use a custom aggregate for ram and vcpu count when filtering by
1684         # running.  this allows us to execute a single query.
1685         #
1686         # XXX - quotes must be used in this order.  postgresql quirk
1687         if only_running:
1688             sum_ram = SumIf('ram', condition="status='running'")
1689             sum_vcpus = SumIf('virtual_cpus', condition="status='running'")
1690         else:
1691             sum_ram = Sum('ram')
1692             sum_vcpus = Sum('virtual_cpus')
1693
1694         base = base.exclude(ram=-1, disk_size=-1, virtual_cpus=-1)
1695
1696         if cluster:
1697             base = base.filter(cluster=cluster)
1698             result = base.aggregate(ram=sum_ram, disk=Sum('disk_size'),
1699                                     virtual_cpus=sum_vcpus)
1700
1701             # repack with zeros instead of Nones
1702             if result['disk'] is None:
1703                 result['disk'] = 0
1704             if result['ram'] is None:
1705                 result['ram'] = 0
1706             if result['virtual_cpus'] is None:
1707                 result['virtual_cpus'] = 0
1708             return result
1709
1710         else:
1711             base = base.values('cluster').annotate(uram=sum_ram,
1712                                                    udisk=Sum('disk_size'),
1713                                                    uvirtual_cpus=sum_vcpus)
1714
1715             # repack as dictionary
1716             result = {}
1717             for used in base:
1718                 # repack with zeros instead of Nones, change index names
1719                 used["ram"] = used.pop("uram") or 0
1720                 used["disk"] = used.pop("udisk") or 0
1721                 used["virtual_cpus"] = used.pop("uvirtual_cpus") or 0
1722                 result[used.pop('cluster')] = used
1723
1724             return result
1725
1726
1727 class Profile(ClusterUser):
1728     """
1729     Profile associated with a django.contrib.auth.User object.
1730     """
1731     user = models.OneToOneField(User)
1732
1733     @models.permalink
1734     def get_absolute_url(self):
1735         return ('muddle_users.views.user')
1736
1737     def grant(self, perm, obj):
1738         self.user.grant(perm, obj)
1739
1740     def set_perms(self, perms, obj):
1741         self.user.set_perms(perms, obj)
1742
1743     def get_objects_any_perms(self, *args, **kwargs):
1744         return self.user.get_objects_any_perms(*args, **kwargs)
1745
1746     def has_perm(self, *args, **kwargs):
1747         return self.user.has_perm(*args, **kwargs)
1748
1749     @property
1750     def permissable(self):
1751         """ returns an object that can be granted permissions """
1752         return self.user
1753
1754
1755 class Organization(ClusterUser):
1756     """
1757     An organization is used for grouping Users.
1758
1759     Organizations are matched with an instance of contrib.auth.models.Group.
1760     This model exists so that contrib.auth.models.Group have a 1:1 relation
1761     with a ClusterUser on which quotas and permissions can be assigned.
1762     """
1763
1764     group = models.OneToOneField(Group, related_name='organization')
1765
1766     def grant(self, perm, object):
1767         self.group.grant(perm, object)
1768
1769     def set_perms(self, perms, object):
1770         self.group.set_perms(perms, object)
1771
1772     def get_objects_any_perms(self, *args, **kwargs):
1773         return self.group.get_objects_any_perms(*args, **kwargs)
1774
1775     def has_perm(self, *args, **kwargs):
1776         return self.group.has_perm(*args, **kwargs)
1777
1778     @property
1779     def permissable(self):
1780         """ returns an object that can be granted permissions """
1781         return self.group
1782
1783
1784 class Quota(models.Model):
1785     """
1786     A resource limit imposed on a ClusterUser for a given Cluster.  The
1787     attributes of this model represent maximum values the ClusterUser can
1788     consume.  The absence of a Quota indicates unlimited usage.
1789     """
1790     user = models.ForeignKey(ClusterUser, related_name='quotas')
1791     cluster = models.ForeignKey(Cluster, related_name='quotas')
1792
1793     ram = models.IntegerField(default=0, null=True, blank=True)
1794     disk = models.IntegerField(default=0, null=True, blank=True)
1795     virtual_cpus = models.IntegerField(default=0, null=True, blank=True)
1796
1797
1798 class SSHKey(models.Model):
1799     """
1800     Model representing user's SSH public key. Virtual machines rely on
1801     many ssh keys.
1802     """
1803     key = models.TextField(validators=[validate_sshkey])
1804     #filename = models.CharField(max_length=128) # saves key file's name
1805     user = models.ForeignKey(User, related_name='ssh_keys')
1806
1807
1808 def create_profile(sender, instance, **kwargs):
1809     """
1810     Create a profile object whenever a new user is created, also keeps the
1811     profile name synchronized with the username
1812     """
1813     try:
1814         profile, new = Profile.objects.get_or_create(user=instance)
1815         if profile.name != instance.username:
1816             profile.name = instance.username
1817             profile.save()
1818     except DatabaseError:
1819         # XXX - since we're using south to track migrations the Profile table
1820         # won't be available the first time syncdb is run.  Catch the error
1821         # here and let the south migration handle it.
1822         pass
1823
1824
1825 def update_cluster_hash(sender, instance, **kwargs):
1826     """
1827     Updates the Cluster hash for all of it's VirtualMachines, Nodes, and Jobs
1828     """
1829     instance.virtual_machines.all().update(cluster_hash=instance.hash)
1830     instance.jobs.all().update(cluster_hash=instance.hash)
1831     instance.nodes.all().update(cluster_hash=instance.hash)
1832
1833
1834 def update_organization(sender, instance, **kwargs):
1835     """
1836     Creates a Organizations whenever a contrib.auth.models.Group is created
1837     """
1838     org, new = Organization.objects.get_or_create(group=instance)
1839     org.name = instance.name
1840     org.save()
1841
1842 post_save.connect(create_profile, sender=User)
1843 post_save.connect(update_cluster_hash, sender=Cluster)
1844 post_save.connect(update_organization, sender=Group)
1845
1846 # Disconnect create_default_site from django.contrib.sites so that
1847 #  the useless table for sites is not created. This will be
1848 #  reconnected for other apps to use in update_sites_module.
1849 post_syncdb.disconnect(create_default_site, sender=sites_app)
1850 post_syncdb.connect(management.update_sites_module, sender=sites_app,
1851                     dispatch_uid="ganeti.management.update_sites_module")
1852
1853
1854 def regenerate_cu_children(sender, **kwargs):
1855     """
1856     Resets may destroy Profiles and/or Organizations. We need to regenerate
1857     them.
1858     """
1859
1860     # So. What are we actually doing here?
1861     # Whenever a User or Group is saved, the associated Profile or
1862     # Organization is also updated. This means that, if a Profile for a User
1863     # is absent, it will be created.
1864     # More importantly, *why* might a Profile be missing? Simple. Resets of
1865     # the ganeti app destroy them. This shouldn't happen in production, and
1866     # only occasionally in development, but it's good to explicitly handle
1867     # this particular case so that missing Profiles not resulting from a reset
1868     # are easier to diagnose.
1869     try:
1870         for user in User.objects.filter(profile__isnull=True):
1871             user.save()
1872         for group in Group.objects.filter(organization__isnull=True):
1873             group.save()
1874     except DatabaseError:
1875         # XXX - since we're using south to track migrations the Profile table
1876         # won't be available the first time syncdb is run.  Catch the error
1877         # here and let the south migration handle it.
1878         pass
1879
1880 post_syncdb.connect(regenerate_cu_children)
1881
1882
1883 def log_group_create(sender, editor, **kwargs):
1884     """ log group creation signal """
1885     log_action('CREATE', editor, sender)
1886
1887
1888 def log_group_edit(sender, editor, **kwargs):
1889     """ log group edit signal """
1890     log_action('EDIT', editor, sender)
1891
1892
1893 muddle_user_signals.view_group_created.connect(log_group_create)
1894 muddle_user_signals.view_group_edited.connect(log_group_edit)
1895
1896
1897 def refresh_objects(sender, **kwargs):
1898     """
1899     This was originally the code in the 0009
1900     and then 0010 'force_object_refresh' migration
1901
1902     Force a refresh of all Cluster, Nodes, and VirtualMachines, and
1903     import any new Nodes.
1904     """
1905
1906     if kwargs.get('app', False) and kwargs['app'] == 'ganeti_web':
1907         Cluster.objects.all().update(mtime=None)
1908         Node.objects.all().update(mtime=None)
1909         VirtualMachine.objects.all().update(mtime=None)
1910
1911         write = sys.stdout.write
1912         flush = sys.stdout.flush
1913
1914         def wf(str, newline=False):
1915             if newline:
1916                 write('\n')
1917             write(str)
1918             flush()
1919
1920         wf('- Refresh Cached Cluster Objects')
1921         wf(' > Synchronizing Cluster Nodes ', True)
1922         flush()
1923         for cluster in Cluster.objects.all().iterator():
1924             try:
1925                 cluster.sync_nodes()
1926                 wf('.')
1927             except GanetiApiError:
1928                 wf('E')
1929
1930         wf(' > Refreshing Node Caches ', True)
1931         for node in Node.objects.all().iterator():
1932             try:
1933                 wf('.')
1934             except GanetiApiError:
1935                 wf('E')
1936
1937         wf(' > Refreshing Instance Caches ', True)
1938         for instance in VirtualMachine.objects.all().iterator():
1939             try:
1940                 wf('.')
1941             except GanetiApiError:
1942                 wf('E')
1943         wf('\n')
1944
1945
1946 # Set this as post_migrate hook.
1947 post_migrate.connect(refresh_objects)
1948
1949 # Register permissions on our models.
1950 # These are part of the DB schema and should not be changed without serious
1951 # forethought.
1952 # You *must* syncdb after you change these.
1953 register(permissions.CLUSTER_PARAMS, Cluster, 'ganeti_web')
1954 register(permissions.VIRTUAL_MACHINE_PARAMS, VirtualMachine, 'ganeti_web')
1955
1956
1957 # register log actions
1958 register_log_actions()