Ticket #3831 (partial) - Rewrite cache updater as twisted application:
[ganeti_webmgr.git] / ganeti / cacher / virtual_machine.py
blob5c6459f07cf40012b06e0da98197ef25a6d3b0bb
1 # Copyright (C) 2010 Oregon State University et al.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
16 # USA.
18 import cPickle
19 from datetime import datetime
21 from django.utils import simplejson
22 from twisted.internet import reactor
23 from twisted.internet.defer import DeferredList, Deferred
24 from twisted.web import client
25 from ganeti.cacher import Timer, Counter
26 from ganeti.models import Cluster, VirtualMachine
29 VMS_URL = 'https://%s:%s/2/instances?bulk=1'
32 class VirtualMachineCacheUpdater(object):
34 def update(self):
35 """
36 Updates the cache for all all VirtualMachines in all clusters. This method
37 processes the data in bulk, where possible, to reduce runtime. Generally
38 this should be faster than refreshing individual VirtualMachines.
39 """
40 self.timer = Timer()
41 print '------[cache update]-------------------------------'
42 clusters = Cluster.objects.all()
43 deferreds = [self.get_cluster_info(cluster) for cluster in clusters]
44 deferred_list = DeferredList(deferreds)
45 deferred_list.addCallback(self.complete)
46 return deferred_list
48 def get_cluster_info(self, cluster):
49 """
50 fetch cluster info from ganeti
51 """
52 deferred = Deferred()
53 d = client.getPage(str(VMS_URL % (cluster.hostname, cluster.port)))
54 d.addCallback(self.process_cluster_info, cluster, deferred.callback)
55 return deferred
57 def process_cluster_info(self, json, cluster, callback):
58 """
59 process data received from ganeti.
60 """
61 print '%s:' % cluster.hostname
62 infos = simplejson.loads(json)
63 self.timer.tick('info fetched from ganeti ')
64 updated = Counter()
65 base = cluster.virtual_machines.all()
66 mtimes = base.values_list('hostname', 'id', 'mtime', 'status')
68 data = {}
69 for name, id, mtime, status in mtimes:
70 data[name] = (id, float(mtime) if mtime else None, status)
71 self.timer.tick('mtimes fetched from db ')
73 deferreds = [self.update_vm(cluster, info, data, updated) for info in infos]
74 deferred_list = DeferredList(deferreds)
76 # batch update the cache updated time for all VMs in this cluster. This
77 # will set the last updated time for both VMs that were modified and for
78 # those that weren't. even if it wasn't modified we want the last
79 # updated time to be up to date.
81 # XXX don't bother checking to see whether this query needs to run. It
82 # normal usage it will almost always need to
83 def update_timestamps(result):
84 print ' updated: %s out of %s' % (updated, len(infos))
85 base.update(cached=datetime.now())
86 self.timer.tick('records or timestamps updated')
87 deferred_list.addCallback(update_timestamps)
89 # XXX it would be nice if the deferred list could be returned and this
90 # callback hooked up outside of the method, but that doesn't seem
91 # possible
92 deferred_list.addCallback(callback)
94 def update_vm(self, cluster, info, data, updated):
95 """
96 updates an individual VirtualMachine: this just sets up the work in a
97 deferred by using callLater. Actual work is done in _update_vm().
99 @param cluster - cluster this node is on
100 @param info - info from ganeti
101 @param data - data from database
102 @param updated - counter object
103 @return Deferred chained to _update_node() call
105 deferred = Deferred()
106 args = (cluster, info, data, updated, deferred.callback)
107 reactor.callLater(0, self._update_vm, *args)
108 return deferred
110 def _update_vm(self, cluster, info, data, updated, callback):
112 updates an individual VirtualMachine, this is the actual work function
114 @param cluster - cluster this node is on
115 @param info - info from ganeti
116 @param data - data from database
117 @param updated - counter object
118 @param callback - callback fired when method is complete.
120 name = info['name']
121 if name in data:
122 id, mtime, status = data[name]
123 if not mtime or mtime < info['mtime'] \
124 or status != info['status']:
125 print ' Virtual Machine (updated) : %s' % name
126 #print ' %s :: %s' % (mtime, datetime.fromtimestamp(info['mtime']))
127 # only update the whole object if it is new or modified.
129 # XXX status changes will not always be reflected in mtime
130 # explicitly check status to see if it has changed. failing
131 # to check this would result in state changes being lost
132 parsed = VirtualMachine.parse_persistent_info(info)
133 VirtualMachine.objects.filter(pk=id) \
134 .update(serialized_info=cPickle.dumps(info), **parsed)
135 updated += 1
136 else:
137 # new vm
138 vm = VirtualMachine(cluster=cluster, hostname=info['name'])
139 vm.info = info
140 vm.save()
142 callback(id)
144 def complete(self, result):
145 """ callback fired when everything is complete """
146 self.timer.stop()