*Fix : satellites links where send with 'con' information. It's not a good thing...
[shinken.git] / bin / shinken-scheduler
blob90e5f98e74438adca2e1990eaf2d9820e3ecd87a
1 #!/usr/bin/env python
2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
6 #This file is part of Shinken.
8 #Shinken is free software: you can redistribute it and/or modify
9 #it under the terms of the GNU Affero General Public License as published by
10 #the Free Software Foundation, either version 3 of the License, or
11 #(at your option) any later version.
13 #Shinken is distributed in the hope that it will be useful,
14 #but WITHOUT ANY WARRANTY; without even the implied warranty of
15 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 #GNU Affero General Public License for more details.
18 #You should have received a copy of the GNU Affero General Public License
19 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
22 #For the Shinken application, I try to respect
23 #The Zen of Python, by Tim Peters. It's just some
24 #very goods ideas that make Python programming very fun
25 #and efficient. If it's good for Python, it must be good for
26 #Shinken. :)
30 #Beautiful is better than ugly.
31 #Explicit is better than implicit.
32 #Simple is better than complex.
33 #Complex is better than complicated.
34 #Flat is better than nested.
35 #Sparse is better than dense.
36 #Readability counts.
37 #Special cases aren't special enough to break the rules.
38 #Although practicality beats purity.
39 #Errors should never pass silently.
40 #Unless explicitly silenced.
41 #In the face of ambiguity, refuse the temptation to guess.
42 #There should be one-- and preferably only one --obvious way to do it.
43 #Although that way may not be obvious at first unless you're Dutch.
44 #Now is better than never.
45 #Although never is often better than *right* now.
46 #If the implementation is hard to explain, it's a bad idea.
47 #If the implementation is easy to explain, it may be a good idea.
48 #Namespaces are one honking great idea -- let's do more of those!
51 #This class is the app for scheduling
52 #it create the scheduling object after listen for arbiter
53 #for a conf. It listen for arbiter even after the scheduler is launch.
54 #if a new conf is received, the scheduler is stopped
55 #and a new one is created.
56 #The scheduler create list of checks and actions for poller
57 #and reactionner.
58 import os
59 import time
60 import sys
61 import select
62 import random
63 import getopt
65 # We try to raise up recusion limit on
66 # but we don't have resource module on windows
67 if os.name != 'nt':
68 import resource
69 # All the pickle will ask for a lot of recursion, so we must make
70 # sure to set it at a high value. The maximum recursion depth depends
71 # on the Python version and the process limit "stack size".
72 # The factors used were aquired by testing a broad range of installations
73 stacksize_soft, stacksize_hard = resource.getrlimit(3)
74 if sys.version_info < (2,6):
75 sys.setrecursionlimit(int(stacksize_soft * 0.65 + 1100))
76 elif sys.version_info < (3,):
77 sys.setrecursionlimit(int(stacksize_soft * 1.9 + 3200))
78 else:
79 sys.setrecursionlimit(int(stacksize_soft * 2.4 + 3200))
82 ## Make sure people are using Python 2.5 or higher
83 if sys.version_info < (2,4):
84 print "Shinken requires as a minimum Python 2.4.x, sorry"
85 sys.exit(1)
86 elif sys.version_info >= (3,):
87 print "Shinken is not yet compatible with Python3k, sorry"
88 sys.exit(1)
90 #Try to load shinken lib.
91 #Maybe it's not in our python path, so we detect it
92 #it so (it's a untar install) we add .. in the path
94 try :
95 from shinken.util import to_bool
96 if hasattr(sys.modules['__main__'], '__file__'):
97 my_path = os.path.abspath(sys.modules['__main__'].__file__)
98 elts = os.path.dirname(my_path).split(os.sep)[:-1]
99 elts.append('shinken')
100 sys.path.append(os.sep.join(elts))
101 except ImportError:
102 if hasattr(sys.modules['__main__'], '__file__'):
103 #Now add in the python path the shinken lib
104 #if we launch it in a direct way and
105 #the shinken is not a python lib
106 my_path = os.path.abspath(sys.modules['__main__'].__file__)
107 elts = os.path.dirname(my_path).split(os.sep)[:-1]
108 sys.path.append(os.sep.join(elts))
109 elts.append('shinken')
110 sys.path.append(os.sep.join(elts))
113 #DBG for Pyro 4
114 sys.path.insert(0, '.')
116 try:
117 import shinken.pyro_wrapper
118 except ImportError:
119 print "Shinken require the Python Pyro module. Please install it."
120 sys.exit(1)
122 Pyro = shinken.pyro_wrapper.Pyro
125 from shinken.scheduler import Scheduler
126 from shinken.config import Config
127 from shinken.macroresolver import MacroResolver
128 from shinken.external_command import ExternalCommandManager
129 from shinken.daemon import Daemon
130 from shinken.util import to_int, to_bool
131 from shinken.modulesmanager import ModulesManager
134 VERSION = "0.4"
138 #Interface for Workers
139 #They connect here and see if they are still OK with
140 #our running_id, if not, they must drop their checks
141 #in progress
142 class IChecks(Pyro.core.ObjBase):
143 #we keep sched link
144 #and we create a running_id so poller and
145 #reactionner know if we restart or not
146 def __init__(self, sched):
147 Pyro.core.ObjBase.__init__(self)
148 self.sched = sched
149 self.running_id = random.random()
152 #poller or reactionner is asking us our running_id
153 def get_running_id(self):
154 return self.running_id
157 #poller or reactionner ask us actions
158 def get_checks(self , do_checks=False, do_actions=False, poller_tags=[]):
159 #print "We ask us checks"
160 res = self.sched.get_to_run_checks(do_checks, do_actions, poller_tags)
161 #print "Sending %d checks" % len(res)
162 self.sched.nb_checks_send += len(res)
163 return res
166 #poller or reactionner are putting us results
167 def put_results(self, results):
168 nb_received = len(results)
169 self.sched.nb_check_received += nb_received
170 print "Received %d results" % nb_received
171 self.sched.waiting_results.extend(results)
173 #for c in results:
174 #self.sched.put_results(c)
175 return True
179 #Interface for Brokers
180 #They connect here and get all broks (data for brokers)
181 #datas must be ORDERED! (initial status BEFORE uodate...)
182 class IBroks(Pyro.core.ObjBase):
183 #we keep sched link
184 def __init__(self, sched):
185 Pyro.core.ObjBase.__init__(self)
186 self.sched = sched
187 self.running_id = random.random()
190 #Broker need to void it's broks?
191 def get_running_id(self):
192 return self.running_id
195 #poller or reactionner ask us actions
196 def get_broks(self):
197 #print "We ask us broks"
198 res = self.sched.get_broks()
199 #print "Sending %d broks" % len(res)#, res
200 self.sched.nb_broks_send += len(res)
201 #we do not more have a full broks in queue
202 self.sched.has_full_broks = False
203 return res
205 #A broker is a new one, if we do not have
206 #a full broks, we clean our broks, and
207 #fill it with all new values
208 def fill_initial_broks(self):
209 if not self.sched.has_full_broks:
210 self.sched.broks.clear()
211 self.sched.fill_initial_broks()
214 #Ping? Pong!
215 def ping(self):
216 return None
219 #Interface for Arbiter, our big MASTER
220 #We ask him a conf and after we listen for him.
221 #HE got user entry, so we must listen him carefully
222 #and give information he want, maybe for another scheduler
223 class IForArbiter(Pyro.core.ObjBase):
224 def __init__(self, app):
225 Pyro.core.ObjBase.__init__(self)
226 self.app = app
227 self.running_id = random.random()
229 #very useful?
230 def get_running_id(self):
231 return self.running_id
234 #use full too?
235 def get_info(self, type, ref, prop, other):
236 return self.app.sched.get_info(type, ref, prop, other)
239 #arbiter is send us a external coomand.
240 #it can send us global command, or specific ones
241 def run_external_command(self, command):
242 self.app.sched.run_external_command(command)
245 #Arbiter is sending us a new conf. We check if we do not already have it.
246 #If not, we take it, and if app has a scheduler, we ask it to die,
247 #so the new conf will be load, and a new scheduler created
248 def put_conf(self, conf_package):
249 (conf, override_conf, modules) = conf_package
250 if not self.app.have_conf or self.app.conf.magic_hash != conf.magic_hash:
251 self.app.conf = conf
252 self.app.override_conf = override_conf
253 self.app.modules = modules
254 print "Get conf:", self.app.conf
255 self.app.have_conf = True
256 print "Have conf?", self.app.have_conf
257 print "Just apres reception"
259 #if app already have a scheduler, we must say him to
260 #DIE Mouahahah
261 #So It will quit, and will load a new conf (and create a brand new scheduler)
262 if hasattr(self.app, "sched"):
263 self.app.sched.die()
266 #Arbiter want to know if we are alive
267 def ping(self):
268 return True
270 #Use by arbiter to know if we have a conf or not
271 #can be usefull if we must do nothing but
272 #we are not because it can KILL US!
273 def have_conf(self):
274 return self.app.have_conf
277 #Call by arbiter if it thinks we are running but we must do not (like
278 #if I was a spare that take a conf but the master returns, I must die
279 #and wait a new conf)
280 #Us : No please...
281 #Arbiter : I don't care, hasta la vista baby!
282 #Us : ... <- Nothing! We are die! you don't follow
283 #anything or what??
284 def wait_new_conf(self):
285 print "Arbiter want me to wait a new conf"
286 self.app.have_conf = False
287 if hasattr(self.app, "sched"):
288 self.app.sched.die()
291 #Tha main app class
292 class Shinken(Daemon):
293 #default_port = 7768
295 properties = {
296 'workdir' : {'default' : '/usr/local/shinken/var', 'pythonize' : None, 'path' : True},
297 'pidfile' : {'default' : '/usr/local/shinken/var/schedulerd.pid', 'pythonize' : None, 'path' : True},
298 'port' : {'default' : '7768', 'pythonize' : to_int},
299 'host' : {'default' : '0.0.0.0', 'pythonize' : None},
300 'user' : {'default' : 'shinken', 'pythonize' : None},
301 'group' : {'default' : 'shinken', 'pythonize' : None},
302 'idontcareaboutsecurity' : {'default' : '0', 'pythonize' : to_bool},
303 'use_ssl' : {'default' : '0', 'pythonize' : to_bool},
304 'certs_dir' : {'default' : 'etc/certs', 'pythonize' : None},
305 'ca_cert' : {'default' : 'etc/certs/ca.pem', 'pythonize' : None},
306 'server_cert' : {'default': 'etc/certs/server.pem', 'pythonize' : None},
307 'hard_ssl_name_check' : {'default' : '0', 'pythonize' : to_bool},
310 #Create the shinken class:
311 #Create a Pyro server (port = arvg 1)
312 #then create the interface for arbiter
313 #Then, it wait for a first configuration
314 def __init__(self, config_file, is_daemon, do_replace, debug, debug_file):
315 self.print_header()
317 #From daemon to manage signal. Call self.manage_signal if
318 #exists, a dummy function otherwise
319 self.set_exit_handler()
321 self.config_file = config_file
322 #Read teh config file if exist
323 #if not, default properties are used
324 self.parse_config_file()
326 if config_file != None:
327 #Some paths can be relatives. We must have a full path by taking
328 #the config file by reference
329 self.relative_paths_to_full(os.path.dirname(config_file))
331 #Check if another Scheduler is not running (with the same conf)
332 self.check_parallel_run(do_replace)
334 #If the admin don't care about security, I allow root running
335 insane = not self.idontcareaboutsecurity
337 #The module grabber part should be run BEFORE change the pwd with
338 #the daemon mode
339 print "modulemanager file", shinken.modulesmanager.__file__
340 modulespath = os.path.abspath(shinken.modulesmanager.__file__)
341 print "modulemanager absolute file", modulespath
342 #We got one of the files of
343 elts = os.path.dirname(modulespath).split(os.sep)[:-1]
344 elts.append('shinken')
345 elts.append('modules')
346 self.modulespath = os.sep.join(elts)
347 print "Using modules path : %s" % os.sep.join(elts)
350 # The SSL part
351 if self.use_ssl:
352 Pyro.config.PYROSSL_CERTDIR = os.path.abspath(self.certs_dir)
353 print "Using ssl certificate directory : %s" % Pyro.config.PYROSSL_CERTDIR
354 Pyro.config.PYROSSL_CA_CERT = os.path.abspath(self.ca_cert)
355 print "Using ssl ca cert file : %s" % Pyro.config.PYROSSL_CA_CERT
356 Pyro.config.PYROSSL_CERT = os.path.abspath(self.server_cert)
357 print"Using ssl server cert file : %s" % Pyro.config.PYROSSL_CERT
358 if self.hard_ssl_name_check:
359 Pyro.config.PYROSSL_POSTCONNCHECK=1
360 else:
361 Pyro.config.PYROSSL_POSTCONNCHECK=0
363 #Try to change the user (not nt for the moment)
364 #TODO: change user on nt
365 if os.name != 'nt':
366 self.change_user(insane)
367 else:
368 print "Sorry, you can't change user on this system"
370 #Now the daemon part if need
371 if is_daemon:
372 self.create_daemon(do_debug=debug, debug_file=debug_file)
375 #TODO : signal managment
376 #atexit.register(unlink, pidfile=conf['pidfile'])
378 #Config Class must be filled with USERN Macro
379 Config.fill_usern_macros()
381 #create the server
382 print "Using working directory : %s" % os.path.abspath(self.workdir)
383 Pyro.config.PYRO_STORAGE = self.workdir
384 Pyro.config.PYRO_COMPRESSION = 1
385 Pyro.config.PYRO_MULTITHREADED = 0
389 self.poller_daemon = shinken.pyro_wrapper.init_daemon(self.host, self.port, self.use_ssl)
391 print "Listening on:", self.host, ":", self.port
393 #Now the interface
394 i_for_arbiter = IForArbiter(self)
396 self.uri2 = shinken.pyro_wrapper.register(self.poller_daemon, i_for_arbiter, "ForArbiter")
398 print "The Arbiter Interface is at:", self.uri2
400 #Ok, now the conf
401 self.must_run = True
402 self.wait_initial_conf()
403 print "Ok we've got conf"
405 #Interface for Broks and Checks
406 self.ichecks = None
407 self.ibroks = None
410 #Manage signal function
411 #Frame is just garbage
412 def manage_signal(self, sig, frame):
413 print "\nExiting with signal", sig
414 if hasattr(self, 'sched'):
415 print "Asking for a retention save"
416 self.sched.update_retention_file(forced=True)
417 print "Stopping all network connexions"
418 self.poller_daemon.shutdown(True)
419 print "Unlinking pid file"
420 try:
421 os.unlink(self.pidfile)
422 except OSError, exp:
423 print "Error un deleting pid file:", exp
424 sys.exit(0)
427 #We wait (block) for arbiter to send us conf
428 def wait_initial_conf(self):
430 self.have_conf = False
431 print "Waiting for initial configuration"
432 timeout = 1.0
433 while not self.have_conf :
434 socks = shinken.pyro_wrapper.get_sockets(self.poller_daemon)
436 avant = time.time()
437 # 'foreign' event loop
438 ins, outs, exs = select.select(socks, [], [], timeout)
439 if ins != []:
440 for s in socks:
441 if s in ins:
442 #Cal the wrapper to manage the good
443 #handleRequests call of daemon
444 shinken.pyro_wrapper.handleRequests(self.poller_daemon, s)
445 apres = time.time()
446 diff = apres-avant
447 timeout = timeout - diff
448 break # no need to continue with the for loop
449 else: #Timeout
450 sys.stdout.write(".")
451 sys.stdout.flush()
452 timeout = 1.0
454 if timeout < 0:
455 timeout = 1.0
458 #Load and init all modules we've got
459 def load_modules(self):
460 self.modules_manager = ModulesManager('scheduler', self.modulespath, self.modules)
461 self.modules_manager.load()
462 self.mod_instances = self.modules_manager.get_instances()
466 #OK, we've got the conf, now we load it
467 #and launch scheduler with it
468 #we also create interface for poller and reactionner
469 def load_conf(self):
470 #First mix conf and override_conf to have our definitive conf
471 for prop in self.override_conf:
472 print "Overriding the property %s with value %s" % (prop, self.override_conf[prop])
473 val = self.override_conf[prop]
474 setattr(self.conf, prop, val)
477 if self.conf.use_timezone != 'NOTSET':
478 print "Setting our timezone to", self.conf.use_timezone
479 os.environ['TZ'] = self.conf.use_timezone
480 time.tzset()
482 print "I've got modules", self.modules
483 self.load_modules()
485 #create scheduler with ref of our daemon
486 self.sched = Scheduler(self.poller_daemon)
488 #give it an interface
489 #But first remove previous interface if exists
490 if self.ichecks != None:
491 print "Deconnecting previous Check Interface from daemon"
492 shinken.pyro_wrapper.unregister(self.poller_daemon, self.ichecks)
494 #Now create and connect it
495 self.ichecks = IChecks(self.sched)
496 self.uri = shinken.pyro_wrapper.register(self.poller_daemon, self.ichecks, "Checks")
497 print "The Checks Interface uri is:", self.uri
499 #Same for Broks
500 if self.ibroks != None:
501 print "Deconnecting previous Broks Interface from daemon"
502 shinken.pyro_wrapper.unregister(self.poller_daemon, self.ibroks)
504 #Create and connect it
505 self.ibroks = IBroks(self.sched)
506 self.uri2 = shinken.pyro_wrapper.register(self.poller_daemon, self.ibroks, "Broks")
507 print "The Broks Interface uri is:", self.uri2
509 print "Loading configuration"
510 self.conf.explode_global_conf()
511 #we give sched it's conf
512 self.sched.load_conf(self.conf)
514 self.sched.load_modules(self.modules_manager, self.mod_instances)
516 #We must update our Config dict macro with good value
517 #from the config parameters
518 self.sched.conf.fill_resource_macros_names_macros()
521 #Creating the Macroresolver Class & unique instance
522 m = MacroResolver()
523 m.init(self.conf)
525 #self.conf.dump()
526 #self.conf.quick_debug()
528 #Now create the external commander
529 #it's a applyer : it role is not to dispatch commands,
530 #but to apply them
531 e = ExternalCommandManager(self.conf, 'applyer')
533 #Scheduler need to know about external command to
534 #activate it if necessery
535 self.sched.load_external_command(e)
537 #External command need the sched because he can raise checks
538 e.load_scheduler(self.sched)
541 #our main function, launch after the init
542 def main(self):
543 #ok, if we are here, we've got the conf
544 self.load_conf()
546 print "Configuration Loaded"
547 while self.must_run:
548 self.sched.run()
549 #Ok, we quit scheduler, but maybe it's just for
550 #reloading our configuration
551 if self.must_run:
552 if self.have_conf:
553 self.load_conf()
554 else:
555 self.wait_initial_conf()
556 self.load_conf()
559 ################### Process launch part
560 def usage(name):
561 print "Shinken Scheduler Daemon, version %s, from :" % VERSION
562 print " Gabes Jean, naparuba@gmail.com"
563 print " Gerhard Lausser, Gerhard.Lausser@consol.de"
564 print "Usage: %s [options] [-c configfile]" % name
565 print "Options:"
566 print " -c, --config"
567 print "\tConfig file."
568 print " -d, --daemon"
569 print "\tRun in daemon mode"
570 print " -r, --replace"
571 print "\tReplace previous running scheduler"
572 print " -h, --help"
573 print "\tPrint detailed help screen"
574 print " --debug"
575 print "\tDebug File. Default : no use (why debug a bug free program? :) )"
579 #if __name__ == '__main__':
580 # p = Shinken()
581 # import cProfile
582 # #p.main()
583 # command = """p.main()"""
584 # cProfile.runctx( command, globals(), locals(), filename="var/Shinken.profile" )
592 #Here we go!
593 if __name__ == "__main__":
594 #Manage the options
595 try:
596 opts, args = getopt.getopt(sys.argv[1:], "hrdc::w", ["help", "replace", "daemon", "config=", "debug=", "easter"])
597 except getopt.GetoptError, err:
598 # print help information and exit:
599 print str(err) # will print something like "option -a not recognized"
600 usage(sys.argv[0])
601 sys.exit(2)
602 #Default params
603 config_file = None
604 is_daemon = False
605 do_replace = False
606 debug = False
607 debug_file = None
608 for o, a in opts:
609 if o in ("-h", "--help"):
610 usage(sys.argv[0])
611 sys.exit()
612 elif o in ("-r", "--replace"):
613 do_replace = True
614 elif o in ("-c", "--config"):
615 config_file = a
616 elif o in ("-d", "--daemon"):
617 is_daemon = True
618 elif o in ("--debug"):
619 debug = True
620 debug_file = a
621 else:
622 print "Sorry, the option", o, a, "is unknown"
623 usage(sys.argv[0])
624 sys.exit()
626 p = Shinken(config_file, is_daemon, do_replace, debug, debug_file)
627 #Ok, now we load the config
629 #p = Shinken(conf)
630 #import cProfile
631 p.main()
632 #command = """p.main()"""
633 #cProfile.runctx( command, globals(), locals(), filename="/tmp/scheduler.profile" )