Merge branch 'master' of ssh://lausser,shinken@shinken.git.sourceforge.net/gitroot...
[shinken.git] / shinken / host.py
blob9450211089baa51c17aae86608eee2fa425d5d36
1 #!/usr/bin/env python
2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
6 #This file is part of Shinken.
8 #Shinken is free software: you can redistribute it and/or modify
9 #it under the terms of the GNU Affero General Public License as published by
10 #the Free Software Foundation, either version 3 of the License, or
11 #(at your option) any later version.
13 #Shinken is distributed in the hope that it will be useful,
14 #but WITHOUT ANY WARRANTY; without even the implied warranty of
15 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 #GNU Affero General Public License for more details.
18 #You should have received a copy of the GNU Affero General Public License
19 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
21 """ This is the main class for the Host. In fact it's mainly
22 about the configuration part. for the running one, it's better
23 to look at the schedulingitem class that manage all
24 scheduling/consome check smart things :)
25 """
27 import time
28 import re #for keys generator
30 from shinken.autoslots import AutoSlots
31 from shinken.item import Items
32 from shinken.schedulingitem import SchedulingItem
33 from shinken.util import to_int, to_float, to_char, to_split, to_bool, format_t_into_dhms_format, to_hostnames_list, get_obj_name, to_svc_hst_distinct_lists, to_list_string_of_names, expand_with_macros
34 from shinken.property import UnusedProp, BoolProp, IntegerProp, FloatProp, CharProp, StringProp, ListProp
35 from shinken.graph import Graph
36 from shinken.macroresolver import MacroResolver
37 from shinken.eventhandler import EventHandler
38 from shinken.log import logger
40 class Host(SchedulingItem):
41 #AutoSlots create the __slots__ with properties and
42 #running_properties names
43 __metaclass__ = AutoSlots
45 id = 1 #0 is reserved for host (primary node for parents)
46 ok_up = 'UP'
47 my_type = 'host'
50 # properties defined by configuration
51 # *required : is required in conf
52 # *default : default value if no set in conf
53 # *pythonize : function to call when transfort string to python object
54 # *fill_brok : if set, send to broker. there are two categories: full_status for initial and update status, check_result for check results
55 # *no_slots : do not take this property for __slots__
56 # Only for the inital call
57 # conf_send_preparation : if set, will pass the property to this function. It's used to "flatten"
58 # some dangerous properties like realms that are too 'linked' to be send like that.
59 # brok_transformation : if set, will call the function with the value of the property
60 # the major times it will be to flatten the data (like realm_name instead of the realm object).
61 properties={
62 'host_name': StringProp(fill_brok=['full_status', 'check_result', 'next_schedule']),
63 'alias': StringProp(fill_brok=['full_status']),
64 'display_name': StringProp(
65 default='none',
66 fill_brok=['full_status']),
67 'address': StringProp(fill_brok=['full_status']),
68 'parents': ListProp(
69 brok_transformation=to_hostnames_list,
70 default='',
71 fill_brok=['full_status']),
72 'hostgroups': StringProp(
73 brok_transformation=to_list_string_of_names,
74 default='',
75 fill_brok=['full_status']),
76 'check_command': StringProp(
77 default='_internal_host_up',
78 fill_brok=['full_status']),
79 'initial_state': CharProp(
80 default='u',
81 fill_brok=['full_status']),
82 'max_check_attempts': IntegerProp(fill_brok=['full_status']),
83 'check_interval': IntegerProp(
84 default='0',
85 fill_brok=['full_status']),
86 'retry_interval': IntegerProp(
87 default='0',
88 fill_brok=['full_status']),
89 'active_checks_enabled': BoolProp(
90 default='1',
91 fill_brok=['full_status']),
92 'passive_checks_enabled': BoolProp(
93 default='1',
94 fill_brok=['full_status']),
95 'check_period': StringProp(fill_brok=['full_status']),
96 'obsess_over_host': BoolProp(
97 default='0',
98 fill_brok=['full_status']),
99 'check_freshness': BoolProp(
100 default='0',
101 fill_brok=['full_status']),
102 'freshness_threshold': IntegerProp(
103 default='0',
104 fill_brok=['full_status']),
105 'event_handler': StringProp(
106 default='',
107 fill_brok=['full_status']),
108 'event_handler_enabled': BoolProp(
109 default='0',
110 fill_brok=['full_status']),
111 'low_flap_threshold': IntegerProp(
112 default='25',
113 fill_brok=['full_status']),
114 'high_flap_threshold': IntegerProp(
115 default='50',
116 fill_brok=['full_status']),
117 'flap_detection_enabled': BoolProp(
118 default='1',
119 fill_brok=['full_status']),
120 'flap_detection_options': ListProp(
121 default='o,d,u',
122 fill_brok=['full_status']),
123 'process_perf_data': BoolProp(
124 default='1',
125 fill_brok=['full_status']),
126 'retain_status_information': BoolProp(
127 default='1',
128 fill_brok=['full_status']),
129 'retain_nonstatus_information': BoolProp(
130 default='1',
131 fill_brok=['full_status']),
132 'contacts': StringProp(
133 default='',
134 fill_brok=['full_status']),
135 'contact_groups': StringProp(
136 default='',
137 fill_brok=['full_status']),
138 'notification_interval': IntegerProp(
139 default='60',
140 fill_brok=['full_status']),
141 'first_notification_delay': IntegerProp(
142 default='0',
143 fill_brok=['full_status']),
144 'notification_period': StringProp(fill_brok=['full_status']),
145 'notification_options': ListProp(
146 default='d,u,r,f',
147 fill_brok=['full_status']),
148 'notifications_enabled': BoolProp(
149 default='1',
150 fill_brok=['full_status']),
151 'stalking_options': ListProp(
152 default='',
153 fill_brok=['full_status']),
154 'notes': StringProp(
155 default='',
156 fill_brok=['full_status']),
157 'notes_url': StringProp(
158 default='',
159 fill_brok=['full_status']),
160 'action_url': StringProp(
161 default='',
162 fill_brok=['full_status']),
163 'icon_image': StringProp(
164 default='',
165 fill_brok=['full_status']),
166 'icon_image_alt': StringProp(
167 default='',
168 fill_brok=['full_status']),
169 'vrml_image': StringProp(
170 default='',
171 fill_brok=['full_status']),
172 'statusmap_image': StringProp(
173 default='',
174 fill_brok=['full_status']),
176 # No slots for this 2 because begin property by a number seems bad
177 # it's stupid!
178 '2d_coords': StringProp(
179 default='',
180 fill_brok=['full_status'],
181 no_slots=True),
182 '3d_coords': StringProp(
183 default='',
184 fill_brok=['full_status'],
185 no_slots=True),
186 'failure_prediction_enabled': BoolProp(
187 default='0',
188 fill_brok=['full_status']),
190 ### New to shinken
191 # 'fill_brok' is ok because in scheduler it's already
192 # a string from conf_send_preparation
193 'realm': StringProp(
194 default=None,
195 fill_brok=['full_status'],
196 conf_send_preparation=get_obj_name),
197 'poller_tag': StringProp(default=None),
199 'resultmodulations': StringProp(default=''),
200 'escalations': StringProp(
201 default='',
202 fill_brok=['full_status']),
203 'maintenance_period': StringProp(
204 default='',
205 fill_brok=['full_status']),
207 # Criticity value
208 'criticity': IntegerProp(
209 default='3',
210 fill_brok=['full_status']),
214 # properties set only for running purpose
215 # retention : save/load this property from retention
216 running_properties = {
217 'last_chk': IntegerProp(
218 default=0,
219 fill_brok=['full_status', 'check_result'],
220 retention=True),
221 'next_chk': IntegerProp(
222 default=0,
223 fill_brok=['full_status', 'next_schedule']),
224 'in_checking': BoolProp(
225 default=False,
226 fill_brok=['full_status', 'check_result', 'next_schedule']),
227 'latency': FloatProp(
228 default=0,
229 fill_brok=['full_status', 'check_result'],
230 retention=True),
231 'attempt': IntegerProp(
232 default=0,
233 fill_brok=['full_status', 'check_result'],
234 retention=True),
235 'state': StringProp(
236 default='PENDING',
237 fill_brok=['full_status'],
238 retention=True),
239 'state_id': IntegerProp(
240 default=0,
241 fill_brok=['full_status', 'check_result'],
242 retention=True),
243 'state_type': StringProp(
244 default='HARD',
245 fill_brok=['full_status'],
246 retention=True),
247 'state_type_id': IntegerProp(
248 default=0,
249 fill_brok=['full_status', 'check_result'],
250 retention=True),
251 'current_event_id': StringProp(
252 default=0,
253 fill_brok=['full_status', 'check_result'],
254 retention=True),
255 'last_event_id': IntegerProp(
256 default=0,
257 fill_brok=['full_status', 'check_result'],
258 retention=True),
259 'last_state': StringProp(
260 default='PENDING',
261 fill_brok=['full_status'],
262 retention=True),
263 'last_state_id': IntegerProp(
264 default=0,
265 fill_brok=['full_status'],
266 retention=True),
267 'last_state_change': FloatProp(
268 default=time.time(),
269 fill_brok=['full_status'],
270 retention=True),
271 'last_hard_state_change': FloatProp(
272 default=time.time(),
273 fill_brok=['full_status'],
274 retention=True),
275 'last_hard_state': StringProp(
276 default='PENDING',
277 fill_brok=['full_status'],
278 retention=True),
279 'last_hard_state_id' : IntegerProp(
280 default=0,
281 fill_brok=['full_status'],
282 retention=True),
283 'last_time_up': IntegerProp(
284 default=int(time.time()),
285 fill_brok=['full_status', 'check_result'],
286 retention=True),
287 'last_time_down': IntegerProp(
288 default=int(time.time()),
289 fill_brok=['full_status', 'check_result'],
290 retention=True),
291 'last_time_unreachable': IntegerProp(
292 default=int(time.time()),
293 fill_brok=['full_status', 'check_result'],
294 retention=True),
295 'duration_sec': IntegerProp(
296 default=0,
297 fill_brok=['full_status'],
298 retention=True),
299 'output': StringProp(
300 default='',
301 fill_brok=['full_status', 'check_result'],
302 retention=True),
303 'long_output': StringProp(
304 default='',
305 fill_brok=['full_status', 'check_result'],
306 retention=True),
307 'is_flapping': BoolProp(
308 default=False,
309 fill_brok=['full_status'],
310 retention=True),
311 'flapping_comment_id': IntegerProp(
312 default=0,
313 fill_brok=['full_status'],
314 retention=True),
315 # No broks for _depend_of because of to much links to hosts/services
316 # dependencies for actions like notif of event handler, so AFTER check return
317 'act_depend_of': StringProp(default=[]),
319 # dependencies for checks raise, so BEFORE checks
320 'chk_depend_of': StringProp(default=[]),
322 # elements that depend of me, so the reverse than just uppper
323 'act_depend_of_me': StringProp(default=[]),
325 # elements that depend of me
326 'chk_depend_of_me': StringProp(default=[]),
328 'last_state_update': StringProp(
329 default=time.time(),
330 fill_brok=['full_status'],
331 retention=True),
333 # no brok ,to much links
334 'services': StringProp(default=[]),
336 # No broks, it's just internal, and checks have too links
337 'checks_in_progress': StringProp(default=[]),
339 # No broks, it's just internal, and checks have too links
340 'notifications_in_progress': StringProp(
341 default={},
342 retention=True),
343 'downtimes': StringProp(
344 default=[],
345 fill_brok=['full_status'],
346 retention=True),
347 'comments': StringProp(
348 default=[],
349 fill_brok=['full_status'],
350 retention=True),
351 'flapping_changes': StringProp(
352 default=[],
353 fill_brok=['full_status'],
354 retention=True),
355 'percent_state_change': FloatProp(
356 default=0.0,
357 fill_brok=['full_status'],
358 retention=True),
359 'problem_has_been_acknowledged': BoolProp(
360 default=False,
361 fill_brok=['full_status'],
362 retention=True),
363 'acknowledgement': StringProp(
364 default=None,
365 retention=True),
366 'acknowledgement_type': IntegerProp(
367 default=1,
368 fill_brok=['full_status', 'check_result'],
369 retention=True),
370 'check_type': IntegerProp(
371 default=0,
372 fill_brok=['full_status', 'check_result'],
373 retention=True),
374 'has_been_checked': IntegerProp(
375 default=0,
376 fill_brok=['full_status', 'check_result'],
377 retention=True),
378 'should_be_scheduled': IntegerProp(
379 default=1,
380 fill_brok=['full_status'],
381 retention=True),
382 'last_problem_id': IntegerProp(
383 default=0,
384 fill_brok=['full_status', 'check_result'],
385 retention=True),
386 'current_problem_id': IntegerProp(
387 default=0,
388 fill_brok=['full_status', 'check_result'],
389 retention=True),
390 'execution_time': FloatProp(
391 default=0.0,
392 fill_brok=['full_status', 'check_result'],
393 retention=True),
394 'last_notification': FloatProp(
395 default=time.time(),
396 fill_brok=['full_status'],
397 retention=True),
398 'current_notification_number': IntegerProp(
399 default=0,
400 fill_brok=['full_status'],
401 retention=True),
402 'current_notification_id': IntegerProp(
403 default=0,
404 fill_brok=['full_status'],
405 retention=True),
406 'check_flapping_recovery_notification': BoolProp(
407 default=True,
408 fill_brok=['full_status'],
409 retention=True),
410 'scheduled_downtime_depth': IntegerProp(
411 default=0,
412 fill_brok=['full_status'],
413 retention=True),
414 'pending_flex_downtime': IntegerProp(
415 default=0,
416 fill_brok=['full_status'],
417 retention=True),
418 'timeout': IntegerProp(
419 default=0,
420 fill_brok=['full_status', 'check_result'],
421 retention=True),
422 'start_time': IntegerProp(
423 default=0,
424 fill_brok=['full_status', 'check_result'],
425 retention=True),
426 'end_time': IntegerProp(
427 default=0,
428 fill_brok=['full_status', 'check_result'],
429 retention=True),
430 'early_timeout': IntegerProp(
431 default=0,
432 fill_brok=['full_status', 'check_result'],
433 retention=True),
434 'return_code': IntegerProp(
435 default=0,
436 fill_brok=['full_status', 'check_result'],
437 retention=True),
438 'perf_data': StringProp(
439 default='',
440 fill_brok=['full_status', 'check_result'],
441 retention=True),
442 'last_perf_data': StringProp(
443 default='',
444 retention=True),
445 'customs': StringProp(default={}, fill_brok=['full_status']),
447 'got_default_realm' : BoolProp(default=False),
449 # use for having all contacts we have notified
450 'notified_contacts': StringProp(
451 default=set()),
453 'in_scheduled_downtime': BoolProp(
454 default=False,
455 retention=True),
456 'in_scheduled_downtime_during_last_check': BoolProp(
457 default=False,
458 retention=True),
460 # put here checks and notif raised
461 'actions': StringProp(
462 default=[]),
463 # and here broks raised
464 'broks': StringProp(
465 default=[]),
466 'childs': StringProp(
467 brok_transformation=to_hostnames_list,
468 default=[],
469 fill_brok=['full_status']),
471 # All errors and warning raised during the configuration parsing
472 # and taht will raised real warning/errors during the is_correct
473 'configuration_warnings': StringProp(default=[]),
474 'configuration_errors': StringProp(default=[]),
476 ### Problem/impact part
477 'is_problem': StringProp(
478 default=False,
479 fill_brok=['full_status']),
480 'is_impact': StringProp(
481 default=False,
482 fill_brok=['full_status']),
483 # the save value of our criticity for "problems"
484 'my_own_criticity': IntegerProp(default=-1),
485 # list of problems that make us an impact
486 'source_problems': StringProp(
487 brok_transformation=to_svc_hst_distinct_lists,
488 default=[],
489 fill_brok=['full_status']),
490 # list of the impact I'm the cause of
491 'impacts': StringProp(
492 brok_transformation=to_svc_hst_distinct_lists,
493 default=[],
494 fill_brok=['full_status']),
496 # keep a trace of the old state before being an impact
497 'state_before_impact': StringProp(default='PENDING'),
498 # keep a trace of the old state id before being an impact
499 'state_id_before_impact': StringProp(default=0),
500 # if the state change, we know so we do not revert it
501 'state_changed_since_impact': StringProp(default=False),
503 #BUSINESS CORRELATOR PART
504 # Say if we are business based rule or not
505 'got_business_rule' : BoolProp(default=False),
506 # Our Dependency node for the business rule
507 'business_rule' : StringProp(default=None),
510 # Hosts macros and prop that give the information
511 # the prop can be callable or not
512 macros = {'HOSTNAME' : 'host_name',
513 'HOSTDISPLAYNAME' : 'display_name',
514 'HOSTALIAS' : 'alias',
515 'HOSTADDRESS' : 'address',
516 'HOSTSTATE' : 'state',
517 'HOSTSTATEID' : 'state_id',
518 'LASTHOSTSTATE' : 'last_state',
519 'LASTHOSTSTATEID' : 'last_state_id',
520 'HOSTSTATETYPE' : 'state_type',
521 'HOSTATTEMPT' : 'attempt',
522 'MAXHOSTATTEMPTS' : 'max_check_attempts',
523 'HOSTEVENTID' : 'current_event_id',
524 'LASTHOSTEVENTID' : 'last_event_id',
525 'HOSTPROBLEMID' : 'current_problem_id',
526 'LASTHOSTPROBLEMID' : 'last_problem_id',
527 'HOSTLATENCY' : 'latency',
528 'HOSTEXECUTIONTIME' : 'execution_time',
529 'HOSTDURATION' : 'get_duration',
530 'HOSTDURATIONSEC' : 'get_duration_sec',
531 'HOSTDOWNTIME' : 'get_downtime',
532 'HOSTPERCENTCHANGE' : 'percent_state_change',
533 'HOSTGROUPNAME' : 'get_groupname',
534 'HOSTGROUPNAMES' : 'get_groupnames',
535 'LASTHOSTCHECK' : 'last_chk',
536 'LASTHOSTSTATECHANGE' : 'last_state_change',
537 'LASTHOSTUP' : 'last_time_up',
538 'LASTHOSTDOWN' : 'last_time_down',
539 'LASTHOSTUNREACHABLE' : 'last_time_unreachable',
540 'HOSTOUTPUT' : 'output',
541 'LONGHOSTOUTPUT' : 'long_output',
542 'HOSTPERFDATA' : 'perf_data',
543 'LASTHOSTPERFDATA' : 'last_perf_data',
544 'HOSTCHECKCOMMAND' : 'get_check_command',
545 'HOSTACKAUTHOR' : 'get_ack_author_name',
546 'HOSTACKAUTHORNAME' : 'get_ack_author_name',
547 'HOSTACKAUTHORALIAS' : 'get_ack_author_name',
548 'HOSTACKCOMMENT' : 'get_ack_comment',
549 'HOSTACTIONURL' : 'action_url',
550 'HOSTNOTESURL' : 'notes_url',
551 'HOSTNOTES' : 'notes',
552 'TOTALHOSTSERVICES' : 'get_total_services',
553 'TOTALHOSTSERVICESOK' : 'get_total_services_ok',
554 'TOTALHOSTSERVICESWARNING' : 'get_total_services_warning',
555 'TOTALHOSTSERVICESUNKNOWN' : 'get_total_services_unknown',
556 'TOTALHOSTSERVICESCRITICAL' : 'get_total_services_critical'
560 # This tab is used to transform old parameters name into new ones
561 # so from Nagios2 format, to Nagios3 ones
562 old_properties = {
563 'normal_check_interval' : 'check_interval',
564 'retry_check_interval' : 'retry_interval'
568 def clean(self):
569 pass
572 # Call by picle for data-ify the host
573 # we do a dict because list are too dangerous for
574 # retention save and co :( even if it's more
575 # extensive
576 # The setstate function do the inverse
577 def __getstate__(self):
578 cls = self.__class__
579 # id is not in *_properties
580 res = {'id' : self.id}
581 for prop in cls.properties:
582 if hasattr(self, prop):
583 res[prop] = getattr(self, prop)
584 for prop in cls.running_properties:
585 if hasattr(self, prop):
586 res[prop] = getattr(self, prop)
587 return res
590 # Inversed funtion of getstate
591 def __setstate__(self, state):
592 cls = self.__class__
593 self.id = state['id']
594 for prop in cls.properties:
595 if prop in state:
596 setattr(self, prop, state[prop])
597 for prop in cls.running_properties:
598 if prop in state:
599 setattr(self, prop, state[prop])
603 # Fill adresse with host_name if not already set
604 def fill_predictive_missing_parameters(self):
605 if hasattr(self, 'host_name') and not hasattr(self, 'address'):
606 self.address = self.host_name
607 if hasattr(self, 'host_name') and not hasattr(self, 'alias'):
608 self.alias = self.host_name
612 # Check is required prop are set:
613 # contacts OR contactgroups is need
614 def is_correct(self):
615 state = True #guilty or not? :)
616 cls = self.__class__
618 special_properties = ['contacts', 'contact_groups', 'check_period', \
619 'notification_interval', 'check_period']
620 for prop in cls.properties:
621 if prop not in special_properties:
622 if not hasattr(self, prop) and cls.properties[prop].required:
623 logger.log("%s : I do not have %s" % (self.get_name(), prop))
624 state = False #Bad boy...
626 # Raised all previously saw errors like unknown contacts and co
627 if self.configuration_errors != []:
628 state = False
629 for err in self.configuration_errors:
630 logger.log(err)
632 # Ok now we manage special cases...
633 if not hasattr(self, 'contacts') and not hasattr(self, 'contact_groups') and self.notifications_enabled == True:
634 logger.log("%s : I do not have contacts nor contact_groups" % self.get_name())
635 state = False
636 if not hasattr(self, 'check_command') or self.check_command == None:
637 logger.log("%s : I've got no check_command" % self.get_name())
638 state = False
639 # Ok got a command, but maybe it's invalid
640 else:
641 if not self.check_command.is_valid():
642 logger.log("%s : my check_command %s is invalid" % (self.get_name(), self.check_command.command))
643 state = False
644 if not hasattr(self, 'notification_interval') and self.notifications_enabled == True:
645 logger.log("%s : I've got no notification_interval but I've got notifications enabled" % self.get_name())
646 state = False
647 # If active check is enabled with a check_interval!=0, we must have a check_period
648 if (hasattr(self, 'active_checks_enabled') and self.active_checks_enabled) and (not hasattr(self, 'check_period') or self.check_period == None) and (hasattr(self, 'check_interval') and self.check_interval!=0):
649 logger.log("%s : My check_period is not correct" % self.get_name())
650 state = False
651 if not hasattr(self, 'realm') or self.realm == None:
652 logger.log("%s : My realm is not correct" % self.get_name())
653 state = False
654 if not hasattr(self, 'check_period'):
655 self.check_period = None
656 if hasattr(self, 'host_name'):
657 for c in cls.illegal_object_name_chars:
658 if c in self.host_name:
659 logger.log("%s : My host_name got the caracter %s that is not allowed." % (self.get_name(), c))
660 state = False
661 return state
664 # Search in my service if I've got the service
665 def find_service_by_name(self, service_description):
666 for s in self.services:
667 if s.service_description == service_description:
668 return s
669 return None
672 # Macro part
673 def get_total_services(self):
674 return str(len(self.services))
677 def get_total_services_ok(self):
678 return str(len([s for s in self.services if s.state_id == 0]))
681 def get_total_services_warning(self):
682 return str(len([s for s in self.services if s.state_id == 1]))
685 def get_total_services_critical(self):
686 return str(len([s for s in self.services if s.state_id == 2]))
689 def get_total_services_unknown(self):
690 return str(len([s for s in self.services if s.state_id == 3]))
693 def get_ack_author_name(self):
694 if self.acknowledgement == None:
695 return ''
696 return self.acknowledgement.author
699 def get_ack_comment(self):
700 if self.acknowledgement == None:
701 return ''
702 return self.acknowledgement.comment
705 def get_check_command(self):
706 return self.check_command.get_name()
709 # For get a nice name
710 def get_name(self):
711 if not self.is_tpl():
712 return self.host_name
713 else:
714 return self.name
717 # For debugin purpose only
718 def get_dbg_name(self):
719 return self.host_name
722 # Add a dependancy for action event handler, notification, etc)
723 # and add ourself in it's dep list
724 def add_host_act_dependancy(self, h, status, timeperiod, inherits_parent):
725 # I add him in MY list
726 self.act_depend_of.append( (h, status, 'logic_dep', timeperiod, inherits_parent) )
727 # And I add me in it's list
728 h.act_depend_of_me.append( (self, status, 'logic_dep', timeperiod, inherits_parent) )
731 # Register the dependancy between 2 service for action (notification etc)
732 # but based on a BUSINESS rule, so on fact:
733 # ERP depend on database, so we fill just database.act_depend_of_me
734 # because we will want ERP mails to go on! So call this
735 # on the database service with the srv=ERP service
736 def add_business_rule_act_dependancy(self, h, status, timeperiod, inherits_parent):
737 # first I add the other the I depend on in MY list
738 # self.act_depend_of.append( (srv, status, 'logic_dep',
739 # timeperiod, inherits_parent) )
740 # I only register so he know that I WILL be a inpact
741 self.act_depend_of_me.append( (h, status, 'business_dep',
742 timeperiod, inherits_parent) )
747 # Add a dependancy for check (so before launch)
748 def add_host_chk_dependancy(self, h, status, timeperiod, inherits_parent):
749 # I add him in MY list
750 self.chk_depend_of.append( (h, status, 'logic_dep', timeperiod, inherits_parent) )
751 # And I add me in it's list
752 h.chk_depend_of_me.append( (self, status, 'logic_dep', timeperiod, inherits_parent) )
755 # Add one of our service to services (at linkify)
756 def add_service_link(self, service):
757 self.services.append(service)
760 # Set unreachable : all our parents are down!
761 # We have a special state, but state was already set, we just need to
762 # update it. We are no DOWN, we are UNREACHABLE and
763 # got a state id is 2
764 def set_unreachable(self):
765 now = time.time()
766 self.state_id = 2
767 self.state = 'UNREACHABLE'
768 self.last_time_unreachable = int(now)
771 # We just go an impact, so we go unreachable
772 # But only if we enable this stte change in the conf
773 def set_impact_state(self):
774 cls = self.__class__
775 if cls.enable_problem_impacts_states_change:
776 # Keep a trace of the old state (problem came back before
777 # a new checks)
778 self.state_before_impact = self.state
779 self.state_id_before_impact = self.state_id
780 # This flag will know if we overide the impact state
781 self.state_changed_since_impact = False
782 self.state = 'UNREACHABLE'#exit code UNDETERMINED
783 self.state_id = 2
786 # Ok, we are no more an impact, if no news checks
787 # overide the impact state, we came back to old
788 # states
789 # And only if impact state change is set in configuration
790 def unset_impact_state(self):
791 cls = self.__class__
792 if cls.enable_problem_impacts_states_change and not self.state_changed_since_impact:
793 self.state = self.state_before_impact
794 self.state_id = self.state_id_before_impact
797 # set the state in UP, DOWN, or UNDETERMINED
798 # with the status of a check. Also update last_state
799 def set_state_from_exit_status(self, status):
800 now = time.time()
801 self.last_state_update = now
803 # we should put in last_state the good last state:
804 # if not just change the state by an problem/impact
805 # we can take current state. But if it's the case, the
806 # real old state is self.state_before_impact (it's teh TRUE
807 # state in fact)
808 # And only if we enable the impact state change
809 cls = self.__class__
810 if cls.enable_problem_impacts_states_change and self.is_impact and not self.state_changed_since_impact:
811 self.last_state = self.state_before_impact
812 else:
813 self.last_state = self.state
815 if status == 0:
816 self.state = 'UP'
817 self.state_id = 0
818 self.last_time_up = int(self.last_state_update)
819 state_code = 'u'
820 elif status in (1, 2, 3):
821 self.state = 'DOWN'
822 self.state_id = 1
823 self.last_time_down = int(self.last_state_update)
824 state_code = 'd'
825 else:
826 self.state = 'DOWN'#exit code UNDETERMINED
827 self.state_id = 1
828 self.last_time_down = int(self.last_state_update)
829 state_code = 'd'
830 if state_code in self.flap_detection_options:
831 self.add_flapping_change(self.state != self.last_state)
832 if self.state != self.last_state:
833 self.last_state_change = self.last_state_update
834 self.duration_sec = now - self.last_state_change
837 # See if status is status. Can be low of high format (o/UP, d/DOWN, ...)
838 def is_state(self, status):
839 if status == self.state:
840 return True
841 # Now low status
842 elif status == 'o' and self.state == 'UP':
843 return True
844 elif status == 'd' and self.state == 'DOWN':
845 return True
846 elif status == 'u' and self.state == 'UNREACHABLE':
847 return True
848 return False
851 # The last time when the state was not UP
852 def last_time_non_ok_or_up(self):
853 if self.last_time_down > self.last_time_up:
854 last_time_non_up = self.last_time_down
855 else:
856 last_time_non_up = 0
857 return last_time_non_up
860 # Add a log entry with a HOST ALERT like:
861 # HOST ALERT: server;DOWN;HARD;1;I don't know what to say...
862 def raise_alert_log_entry(self):
863 logger.log('HOST ALERT: %s;%s;%s;%d;%s' % (self.get_name(), self.state, self.state_type, self.attempt, self.output))
866 # Add a log entry with a Freshness alert like:
867 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
868 # I'm forcing an immediate check of the host.
869 def raise_freshness_log_entry(self, t_stale_by, t_threshold):
870 logger.log("Warning: The results of host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the host." \
871 % (self.get_name(), format_t_into_dhms_format(t_stale_by), format_t_into_dhms_format(t_threshold)))
874 # Raise a log entry with a Notification alert like
875 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
876 def raise_notification_log_entry(self, n):
877 contact = n.contact
878 command = n.command_call
879 if n.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
880 state = '%s (%s)' % (n.type, self.state)
881 else:
882 state = self.state
883 if self.__class__.log_notifications:
884 logger.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact.get_name(), self.get_name(), state, \
885 command.get_name(), self.output))
887 # Raise a log entry with a Eventhandler alert like
888 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
889 def raise_event_handler_log_entry(self, command):
890 if self.__class__.log_event_handlers:
891 logger.log("HOST EVENT HANDLER: %s;%s;%s;%s;%s" % (self.get_name(), self.state, self.state_type, self.attempt, \
892 command.get_name()))
895 #Raise a log entry with FLAPPING START alert like
896 #HOST FLAPPING ALERT: server;STARTED; Host appears to have started flapping (50.6% change >= 50.0% threshold)
897 def raise_flapping_start_log_entry(self, change_ratio, threshold):
898 logger.log("HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%.1f% change >= %.1% threshold)" % \
899 (self.get_name(), change_ratio, threshold))
902 #Raise a log entry with FLAPPING STOP alert like
903 #HOST FLAPPING ALERT: server;STOPPED; host appears to have stopped flapping (23.0% change < 25.0% threshold)
904 def raise_flapping_stop_log_entry(self, change_ratio, threshold):
905 logger.log("HOST FLAPPING ALERT: %s;STOPPED; Host appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
906 (self.get_name(), change_ratio, threshold))
909 #If there is no valid time for next check, raise a log entry
910 def raise_no_next_check_log_entry(self):
911 logger.log("Warning : I cannot schedule the check for the host '%s' because there is not future valid time" % \
912 (self.get_name()))
914 #Raise a log entry when a downtime begins
915 #HOST DOWNTIME ALERT: test_host_0;STARTED; Host has entered a period of scheduled downtime
916 def raise_enter_downtime_log_entry(self):
917 logger.log("HOST DOWNTIME ALERT: %s;STARTED; Host has entered a period of scheduled downtime" % \
918 (self.get_name()))
921 #Raise a log entry when a downtime has finished
922 #HOST DOWNTIME ALERT: test_host_0;STOPPED; Host has exited from a period of scheduled downtime
923 def raise_exit_downtime_log_entry(self):
924 logger.log("HOST DOWNTIME ALERT: %s;STOPPED; Host has exited from a period of scheduled downtime" % \
925 (self.get_name()))
928 #Raise a log entry when a downtime prematurely ends
929 #HOST DOWNTIME ALERT: test_host_0;CANCELLED; Service has entered a period of scheduled downtime
930 def raise_cancel_downtime_log_entry(self):
931 logger.log("HOST DOWNTIME ALERT: %s;CANCELLED; Scheduled downtime for host has been cancelled." % \
932 (self.get_name()))
935 #Is stalking ?
936 #Launch if check is waitconsume==first time
937 #and if c.status is in self.stalking_options
938 def manage_stalking(self, c):
939 need_stalk = False
940 if c.status == 'waitconsume':
941 if c.exit_status == 0 and 'o' in self.stalking_options:
942 need_stalk = True
943 elif c.exit_status == 1 and 'd' in self.stalking_options:
944 need_stalk = True
945 elif c.exit_status == 2 and 'd' in self.stalking_options:
946 need_stalk = True
947 elif c.exit_status == 3 and 'u' in self.stalking_options:
948 need_stalk = True
949 if c.output != self.output:
950 need_stalk = False
951 if need_stalk:
952 logger.log("Stalking %s : %s" % (self.get_name(), self.output))
955 #fill act_depend_of with my parents (so network dep)
956 #and say parents they impact me, no timeperiod and folow parents of course
957 def fill_parents_dependancie(self):
958 for parent in self.parents:
959 if parent is not None:
960 #I add my parent in my list
961 self.act_depend_of.append( (parent, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
963 #And I register myself in my parent list too
964 parent.register_child(self)
967 # Register a child in our lists
968 def register_child(self, child):
969 # We've got 2 list : a list for our child
970 # where we just put the pointer, it's jsut for broking
971 # and anotehr with all data, useful for 'running' part
972 self.childs.append(child)
973 self.act_depend_of_me.append( (child, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
976 #Give data for checks's macros
977 def get_data_for_checks(self):
978 return [self]
980 #Give data for event handler's macro
981 def get_data_for_event_handler(self):
982 return [self]
984 #Give data for notifications'n macros
985 def get_data_for_notifications(self, contact, n):
986 return [self, contact, n]
989 #See if the notification is launchable (time is OK and contact is OK too)
990 def notification_is_blocked_by_contact(self, n, contact):
991 return not contact.want_host_notification(self.last_chk, self.state, n.type, self.criticity)
994 #MACRO PART
995 def get_duration_sec(self):
996 return str(int(self.duration_sec))
999 def get_duration(self):
1000 m, s = divmod(self.duration_sec, 60)
1001 h, m = divmod(m, 60)
1002 return "%02dh %02dm %02ds" % (h, m, s)
1005 #Check if a notification for this host is suppressed at this time
1006 #This is a check at the host level. Do not look at contacts here
1007 def notification_is_blocked_by_item(self, type, t_wished = None):
1008 if t_wished == None:
1009 t_wished = time.time()
1011 # TODO
1012 # forced notification -> false
1013 # custom notification -> false
1015 # Block if notifications are program-wide disabled
1016 if not self.enable_notifications:
1017 return True
1019 # Does the notification period allow sending out this notification?
1020 if not self.notification_period.is_time_valid(t_wished):
1021 return True
1023 # Block if notifications are disabled for this host
1024 if not self.notifications_enabled:
1025 return True
1027 # Block if the current status is in the notification_options d,u,r,f,s
1028 if 'n' in self.notification_options:
1029 return True
1031 if type in ('PROBLEM', 'RECOVERY'):
1032 if self.state == 'DOWN' and not 'd' in self.notification_options:
1033 return True
1034 if self.state == 'UP' and not 'r' in self.notification_options:
1035 return True
1036 if self.state == 'UNREACHABLE' and not 'u' in self.notification_options:
1037 return True
1038 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
1039 and not 'f' in self.notification_options):
1040 return True
1041 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
1042 and not 's' in self.notification_options):
1043 return True
1045 # Acknowledgements make no sense when the status is ok/up
1046 if type == 'ACKNOWLEDGEMENT':
1047 if self.state == self.ok_up:
1048 return True
1050 # Flapping
1051 if type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
1052 # todo block if not notify_on_flapping
1053 if self.scheduled_downtime_depth > 0:
1054 return True
1056 # When in deep downtime, only allow end-of-downtime notifications
1057 # In depth 1 the downtime just started and can be notified
1058 if self.scheduled_downtime_depth > 1 and not type in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
1059 return True
1061 # Block if in a scheduled downtime and a problem arises
1062 if self.scheduled_downtime_depth > 0 and type in ('PROBLEM', 'RECOVERY'):
1063 return True
1065 # Block if the status is SOFT
1066 if self.state_type == 'SOFT' and type == 'PROBLEM':
1067 return True
1069 # Block if the problem has already been acknowledged
1070 if self.problem_has_been_acknowledged and type != 'ACKNOWLEDGEMENT':
1071 return True
1073 # Block if flapping
1074 if self.is_flapping:
1075 return True
1077 return False
1080 #Get a oc*p command if item has obsess_over_*
1081 #command. It must be enabled locally and globally
1082 def get_obsessive_compulsive_processor_command(self):
1083 cls = self.__class__
1084 if not cls.obsess_over or not self.obsess_over_host:
1085 return
1087 m = MacroResolver()
1088 data = self.get_data_for_event_handler()
1089 cmd = m.resolve_command(cls.ochp_command, data)
1090 e = EventHandler(cmd, timeout=cls.ochp_timeout)
1092 #ok we can put it in our temp action queue
1093 self.actions.append(e)
1098 class Hosts(Items):
1099 name_property = "host_name" #use for the search by name
1100 inner_class = Host #use for know what is in items
1103 #prepare_for_conf_sending to flatten some properties
1104 def prepare_for_sending(self):
1105 for h in self:
1106 h.prepare_for_conf_sending()
1109 #Create link between elements:
1110 #hosts -> timeperiods
1111 #hosts -> hosts (parents, etc)
1112 #hosts -> commands (check_command)
1113 #hosts -> contacts
1114 def linkify(self, timeperiods=None, commands=None, contacts=None, realms=None, resultmodulations=None, escalations=None, hostgroups=None):
1115 self.linkify_with_timeperiods(timeperiods, 'notification_period')
1116 self.linkify_with_timeperiods(timeperiods, 'check_period')
1117 self.linkify_with_timeperiods(timeperiods, 'maintenance_period')
1118 self.linkify_h_by_h()
1119 self.linkify_h_by_hg(hostgroups)
1120 self.linkify_one_command_with_commands(commands, 'check_command')
1121 self.linkify_one_command_with_commands(commands, 'event_handler')
1123 self.linkify_with_contacts(contacts)
1124 self.linkify_h_by_realms(realms)
1125 self.linkify_with_resultmodulations(resultmodulations)
1126 #WARNING: all escalations will not be link here
1127 #(just the escalation here, not serviceesca or hostesca).
1128 #This last one will be link in escalations linkify.
1129 self.linkify_with_escalations(escalations)
1132 #Fill adress by host_name if not set
1133 def fill_predictive_missing_parameters(self):
1134 for h in self:
1135 h.fill_predictive_missing_parameters()
1138 #Link host with hosts (parents)
1139 def linkify_h_by_h(self):
1140 for h in self:
1141 parents = h.parents
1142 #The new member list
1143 new_parents = []
1144 for parent in parents:
1145 parent = parent.strip()
1146 p = self.find_by_name(parent)
1147 if p != None:
1148 new_parents.append(p)
1149 else:
1150 err = "Error : the parent '%s' on host '%s' is unknown!" % (parent, h.get_name())
1151 self.configuration_errors.append(err)
1152 #print "Me,", h.host_name, "define my parents", new_parents
1153 #We find the id, we remplace the names
1154 h.parents = new_parents
1157 #Link with realms and set a default realm if none
1158 def linkify_h_by_realms(self, realms):
1159 default_realm = None
1160 for r in realms:
1161 if hasattr(r, 'default') and r.default:
1162 default_realm = r
1163 if default_realm == None:
1164 print "Error : there is no default realm defined!"
1165 for h in self:
1166 #print h.get_name(), h.realm
1167 if h.realm != None:
1168 p = realms.find_by_name(h.realm.strip())
1169 if p != None:
1170 h.realm = p
1171 print "Host", h.get_name(), "is in the realm", p.get_name()
1172 else:
1173 err = "Error : the host %s got a invalid realm (%s)!" % (h.get_name(), h.realm)
1174 h.configuration_errors.append(err)
1175 h.realm = None
1176 else:
1177 #print "Notice : applying default realm %s to host %s" % (default_realm.get_name(), h.get_name())
1178 h.realm = default_realm
1179 h.got_default_realm = True
1182 #We look for hostgroups property in hosts and
1183 #link them
1184 def linkify_h_by_hg(self, hostgroups):
1185 #Hostgroups property need to be fullfill for got the informations
1186 #self.apply_partial_inheritance('hostgroups')
1187 #self.apply_partial_inheritance('contact_groups')
1189 #Register host in the hostgroups
1190 for h in self:
1191 if not h.is_tpl():
1192 new_hostgroups = []
1193 if hasattr(h, 'hostgroups') and h.hostgroups != '':
1194 hgs = h.hostgroups.split(',')
1195 for hg_name in hgs:
1196 hg_name = hg_name.strip()
1197 hg = hostgroups.find_by_name(hg_name)
1198 if hg != None:
1199 new_hostgroups.append(hg)
1200 else:
1201 err = "Error : the hostgroup '%s' of the host '%s' is unknown" % (hg_name, h.host_name)
1202 h.configuration_errors.append(err)
1203 h.hostgroups = new_hostgroups
1207 #It's used to change old Nagios2 names to
1208 #Nagios3 ones
1209 def old_properties_names_to_new(self):
1210 for h in self:
1211 h.old_properties_names_to_new()
1215 #We look for hostgroups property in hosts and
1216 def explode(self, hostgroups, contactgroups):
1217 #Hostgroups property need to be fullfill for got the informations
1218 #self.apply_partial_inheritance('hostgroups')
1219 #self.apply_partial_inheritance('contact_groups')
1221 #Register host in the hostgroups
1222 for h in self:
1223 if not h.is_tpl() and hasattr(h, 'host_name'):
1224 hname = h.host_name
1225 if hasattr(h, 'hostgroups'):
1226 hgs = h.hostgroups.split(',')
1227 for hg in hgs:
1228 hostgroups.add_member(hname, hg.strip())
1230 #items::explode_contact_groups_into_contacts
1231 #take all contacts from our contact_groups into our contact property
1232 self.explode_contact_groups_into_contacts(contactgroups)
1236 #Create depenancies:
1237 #Depencies at the host level: host parent
1238 def apply_dependancies(self):
1239 for h in self:
1240 h.fill_parents_dependancie()
1243 #Parent graph: use to find quickly relations between all host, and loop
1244 #return True if tehre is a loop
1245 def no_loop_in_parents(self):
1246 #Ok, we say "from now, no loop :) "
1247 r = True
1249 #Create parent graph
1250 parents = Graph()
1252 #With all hosts as nodes
1253 for h in self:
1254 if h != None:
1255 parents.add_node(h)
1257 #And now fill edges
1258 for h in self:
1259 for p in h.parents:
1260 if p != None:
1261 parents.add_edge(p, h)
1263 #Now get the list of all hosts in a loop
1264 host_in_loops = parents.loop_check()
1266 #and raise errors about it
1267 for h in host_in_loops:
1268 logger.log("Error: The host '%s' is part of a circular parent/child chain!" % h.get_name())
1269 r = False
1271 return r
1274 #Return a list of the host_name of the hosts
1275 #that gotthe template with name=tpl_name
1276 def find_hosts_that_use_template(self, tpl_name):
1277 res = []
1278 #first find the template
1279 tpl = None
1280 for h in self:
1281 #Look fortemplate with the good name
1282 if h.is_tpl() and hasattr(h, 'name') and h.name == tpl_name:
1283 tpl = h
1285 #If we find noone, we return nothing (easy case:) )
1286 if tpl == None:
1287 return []
1289 #Ok, we find the tpl
1290 for h in self:
1291 if tpl in h.templates and hasattr(h, 'host_name'):
1292 res.append(h.host_name)
1294 return res
1297 # Will create all business tree for the
1298 # services
1299 def create_business_rules(self, hosts, services):
1300 for h in self:
1301 h.create_business_rules(hosts, services)
1304 # Will link all business service/host with theirs
1305 # dep for problem/impact link
1306 def create_business_rules_dependencies(self):
1307 for h in self:
1308 h.create_business_rules_dependencies()