Merge branch 'master' of ssh://naparuba@shinken.git.sourceforge.net/gitroot/shinken...
[shinken.git] / shinken / host.py
blob42ce7d17e2bc44f3b3b52b0bf6c10c5986e31316
1 #!/usr/bin/env python
2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
5 # Gregory Starck, g.starck@gmail.com
7 #This file is part of Shinken.
9 #Shinken is free software: you can redistribute it and/or modify
10 #it under the terms of the GNU Affero General Public License as published by
11 #the Free Software Foundation, either version 3 of the License, or
12 #(at your option) any later version.
14 #Shinken is distributed in the hope that it will be useful,
15 #but WITHOUT ANY WARRANTY; without even the implied warranty of
16 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 #GNU Affero General Public License for more details.
19 #You should have received a copy of the GNU Affero General Public License
20 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
22 """ This is the main class for the Host. In fact it's mainly
23 about the configuration part. for the running one, it's better
24 to look at the schedulingitem class that manage all
25 scheduling/consome check smart things :)
26 """
28 import time
29 import re #for keys generator
31 from shinken.autoslots import AutoSlots
32 from shinken.item import Items
33 from shinken.schedulingitem import SchedulingItem
34 from shinken.util import to_int, to_float, to_char, to_split, to_bool, format_t_into_dhms_format, to_hostnames_list, get_obj_name, to_svc_hst_distinct_lists, to_list_string_of_names, expand_with_macros
35 from shinken.property import UnusedProp, BoolProp, IntegerProp, FloatProp, CharProp, StringProp, ListProp
36 from shinken.graph import Graph
37 from shinken.macroresolver import MacroResolver
38 from shinken.eventhandler import EventHandler
39 from shinken.log import logger
41 class Host(SchedulingItem):
42 #AutoSlots create the __slots__ with properties and
43 #running_properties names
44 __metaclass__ = AutoSlots
46 id = 1 #0 is reserved for host (primary node for parents)
47 ok_up = 'UP'
48 my_type = 'host'
51 # properties defined by configuration
52 # *required : is required in conf
53 # *default : default value if no set in conf
54 # *pythonize : function to call when transfort string to python object
55 # *fill_brok : if set, send to broker. there are two categories: full_status for initial and update status, check_result for check results
56 # *no_slots : do not take this property for __slots__
57 # Only for the inital call
58 # conf_send_preparation : if set, will pass the property to this function. It's used to "flatten"
59 # some dangerous properties like realms that are too 'linked' to be send like that.
60 # brok_transformation : if set, will call the function with the value of the property
61 # the major times it will be to flatten the data (like realm_name instead of the realm object).
62 properties={
63 'host_name': StringProp(fill_brok=['full_status', 'check_result', 'next_schedule']),
64 'alias': StringProp(fill_brok=['full_status']),
65 'display_name': StringProp(
66 default='none',
67 fill_brok=['full_status']),
68 'address': StringProp(fill_brok=['full_status']),
69 'parents': ListProp(
70 brok_transformation=to_hostnames_list,
71 default='',
72 fill_brok=['full_status']),
73 'hostgroups': StringProp(
74 brok_transformation=to_list_string_of_names,
75 default='',
76 fill_brok=['full_status']),
77 'check_command': StringProp(
78 default='_internal_host_up',
79 fill_brok=['full_status']),
80 'initial_state': CharProp(
81 default='u',
82 fill_brok=['full_status']),
83 'max_check_attempts': IntegerProp(fill_brok=['full_status']),
84 'check_interval': IntegerProp(
85 default='0',
86 fill_brok=['full_status']),
87 'retry_interval': IntegerProp(
88 default='0',
89 fill_brok=['full_status']),
90 'active_checks_enabled': BoolProp(
91 default='1',
92 fill_brok=['full_status']),
93 'passive_checks_enabled': BoolProp(
94 default='1',
95 fill_brok=['full_status']),
96 'check_period': StringProp(fill_brok=['full_status']),
97 'obsess_over_host': BoolProp(
98 default='0',
99 fill_brok=['full_status']),
100 'check_freshness': BoolProp(
101 default='0',
102 fill_brok=['full_status']),
103 'freshness_threshold': IntegerProp(
104 default='0',
105 fill_brok=['full_status']),
106 'event_handler': StringProp(
107 default='',
108 fill_brok=['full_status']),
109 'event_handler_enabled': BoolProp(
110 default='0',
111 fill_brok=['full_status']),
112 'low_flap_threshold': IntegerProp(
113 default='25',
114 fill_brok=['full_status']),
115 'high_flap_threshold': IntegerProp(
116 default='50',
117 fill_brok=['full_status']),
118 'flap_detection_enabled': BoolProp(
119 default='1',
120 fill_brok=['full_status']),
121 'flap_detection_options': ListProp(
122 default='o,d,u',
123 fill_brok=['full_status']),
124 'process_perf_data': BoolProp(
125 default='1',
126 fill_brok=['full_status']),
127 'retain_status_information': BoolProp(
128 default='1',
129 fill_brok=['full_status']),
130 'retain_nonstatus_information': BoolProp(
131 default='1',
132 fill_brok=['full_status']),
133 'contacts': StringProp(
134 default='',
135 fill_brok=['full_status']),
136 'contact_groups': StringProp(
137 default='',
138 fill_brok=['full_status']),
139 'notification_interval': IntegerProp(
140 default='60',
141 fill_brok=['full_status']),
142 'first_notification_delay': IntegerProp(
143 default='0',
144 fill_brok=['full_status']),
145 'notification_period': StringProp(fill_brok=['full_status']),
146 'notification_options': ListProp(
147 default='d,u,r,f',
148 fill_brok=['full_status']),
149 'notifications_enabled': BoolProp(
150 default='1',
151 fill_brok=['full_status']),
152 'stalking_options': ListProp(
153 default='',
154 fill_brok=['full_status']),
155 'notes': StringProp(
156 default='',
157 fill_brok=['full_status']),
158 'notes_url': StringProp(
159 default='',
160 fill_brok=['full_status']),
161 'action_url': StringProp(
162 default='',
163 fill_brok=['full_status']),
164 'icon_image': StringProp(
165 default='',
166 fill_brok=['full_status']),
167 'icon_image_alt': StringProp(
168 default='',
169 fill_brok=['full_status']),
170 'vrml_image': StringProp(
171 default='',
172 fill_brok=['full_status']),
173 'statusmap_image': StringProp(
174 default='',
175 fill_brok=['full_status']),
177 # No slots for this 2 because begin property by a number seems bad
178 # it's stupid!
179 '2d_coords': StringProp(
180 default='',
181 fill_brok=['full_status'],
182 no_slots=True),
183 '3d_coords': StringProp(
184 default='',
185 fill_brok=['full_status'],
186 no_slots=True),
187 'failure_prediction_enabled': BoolProp(
188 default='0',
189 fill_brok=['full_status']),
191 ### New to shinken
192 # 'fill_brok' is ok because in scheduler it's already
193 # a string from conf_send_preparation
194 'realm': StringProp(
195 default=None,
196 fill_brok=['full_status'],
197 conf_send_preparation=get_obj_name),
198 'poller_tag': StringProp(default=None),
200 'resultmodulations': StringProp(default=''),
201 'escalations': StringProp(
202 default='',
203 fill_brok=['full_status']),
204 'maintenance_period': StringProp(
205 default='',
206 fill_brok=['full_status']),
208 # Criticity value
209 'criticity': IntegerProp(
210 default='3',
211 fill_brok=['full_status']),
215 # properties set only for running purpose
216 # retention : save/load this property from retention
217 running_properties = {
218 'last_chk': IntegerProp(
219 default=0,
220 fill_brok=['full_status', 'check_result'],
221 retention=True),
222 'next_chk': IntegerProp(
223 default=0,
224 fill_brok=['full_status', 'next_schedule']),
225 'in_checking': BoolProp(
226 default=False,
227 fill_brok=['full_status', 'check_result', 'next_schedule']),
228 'latency': FloatProp(
229 default=0,
230 fill_brok=['full_status', 'check_result'],
231 retention=True),
232 'attempt': IntegerProp(
233 default=0,
234 fill_brok=['full_status', 'check_result'],
235 retention=True),
236 'state': StringProp(
237 default='PENDING',
238 fill_brok=['full_status'],
239 retention=True),
240 'state_id': IntegerProp(
241 default=0,
242 fill_brok=['full_status', 'check_result'],
243 retention=True),
244 'state_type': StringProp(
245 default='HARD',
246 fill_brok=['full_status'],
247 retention=True),
248 'state_type_id': IntegerProp(
249 default=0,
250 fill_brok=['full_status', 'check_result'],
251 retention=True),
252 'current_event_id': StringProp(
253 default=0,
254 fill_brok=['full_status', 'check_result'],
255 retention=True),
256 'last_event_id': IntegerProp(
257 default=0,
258 fill_brok=['full_status', 'check_result'],
259 retention=True),
260 'last_state': StringProp(
261 default='PENDING',
262 fill_brok=['full_status'],
263 retention=True),
264 'last_state_id': IntegerProp(
265 default=0,
266 fill_brok=['full_status'],
267 retention=True),
268 'last_state_type' : StringProp(
269 default='HARD',
270 fill_brok=['full_status'],
271 retention=True),
272 'last_state_change': FloatProp(
273 default=time.time(),
274 fill_brok=['full_status'],
275 retention=True),
276 'last_hard_state_change': FloatProp(
277 default=time.time(),
278 fill_brok=['full_status', 'check_result'],
279 retention=True),
280 'last_hard_state': StringProp(
281 default='PENDING',
282 fill_brok=['full_status'],
283 retention=True),
284 'last_hard_state_id' : IntegerProp(
285 default=0,
286 fill_brok=['full_status'],
287 retention=True),
288 'last_time_up': IntegerProp(
289 default=int(time.time()),
290 fill_brok=['full_status', 'check_result'],
291 retention=True),
292 'last_time_down': IntegerProp(
293 default=int(time.time()),
294 fill_brok=['full_status', 'check_result'],
295 retention=True),
296 'last_time_unreachable': IntegerProp(
297 default=int(time.time()),
298 fill_brok=['full_status', 'check_result'],
299 retention=True),
300 'duration_sec': IntegerProp(
301 default=0,
302 fill_brok=['full_status'],
303 retention=True),
304 'output': StringProp(
305 default='',
306 fill_brok=['full_status', 'check_result'],
307 retention=True),
308 'long_output': StringProp(
309 default='',
310 fill_brok=['full_status', 'check_result'],
311 retention=True),
312 'is_flapping': BoolProp(
313 default=False,
314 fill_brok=['full_status'],
315 retention=True),
316 'flapping_comment_id': IntegerProp(
317 default=0,
318 fill_brok=['full_status'],
319 retention=True),
320 # No broks for _depend_of because of to much links to hosts/services
321 # dependencies for actions like notif of event handler, so AFTER check return
322 'act_depend_of': StringProp(default=[]),
324 # dependencies for checks raise, so BEFORE checks
325 'chk_depend_of': StringProp(default=[]),
327 # elements that depend of me, so the reverse than just uppper
328 'act_depend_of_me': StringProp(default=[]),
330 # elements that depend of me
331 'chk_depend_of_me': StringProp(default=[]),
333 'last_state_update': StringProp(
334 default=time.time(),
335 fill_brok=['full_status'],
336 retention=True),
338 # no brok ,to much links
339 'services': StringProp(default=[]),
341 # No broks, it's just internal, and checks have too links
342 'checks_in_progress': StringProp(default=[]),
344 # No broks, it's just internal, and checks have too links
345 'notifications_in_progress': StringProp(
346 default={},
347 retention=True),
348 'downtimes': StringProp(
349 default=[],
350 fill_brok=['full_status'],
351 retention=True),
352 'comments': StringProp(
353 default=[],
354 fill_brok=['full_status'],
355 retention=True),
356 'flapping_changes': StringProp(
357 default=[],
358 fill_brok=['full_status'],
359 retention=True),
360 'percent_state_change': FloatProp(
361 default=0.0,
362 fill_brok=['full_status'],
363 retention=True),
364 'problem_has_been_acknowledged': BoolProp(
365 default=False,
366 fill_brok=['full_status'],
367 retention=True),
368 'acknowledgement': StringProp(
369 default=None,
370 retention=True),
371 'acknowledgement_type': IntegerProp(
372 default=1,
373 fill_brok=['full_status', 'check_result'],
374 retention=True),
375 'check_type': IntegerProp(
376 default=0,
377 fill_brok=['full_status', 'check_result'],
378 retention=True),
379 'has_been_checked': IntegerProp(
380 default=0,
381 fill_brok=['full_status', 'check_result'],
382 retention=True),
383 'should_be_scheduled': IntegerProp(
384 default=1,
385 fill_brok=['full_status'],
386 retention=True),
387 'last_problem_id': IntegerProp(
388 default=0,
389 fill_brok=['full_status', 'check_result'],
390 retention=True),
391 'current_problem_id': IntegerProp(
392 default=0,
393 fill_brok=['full_status', 'check_result'],
394 retention=True),
395 'execution_time': FloatProp(
396 default=0.0,
397 fill_brok=['full_status', 'check_result'],
398 retention=True),
399 'last_notification': FloatProp(
400 default=time.time(),
401 fill_brok=['full_status'],
402 retention=True),
403 'current_notification_number': IntegerProp(
404 default=0,
405 fill_brok=['full_status'],
406 retention=True),
407 'current_notification_id': IntegerProp(
408 default=0,
409 fill_brok=['full_status'],
410 retention=True),
411 'check_flapping_recovery_notification': BoolProp(
412 default=True,
413 fill_brok=['full_status'],
414 retention=True),
415 'scheduled_downtime_depth': IntegerProp(
416 default=0,
417 fill_brok=['full_status'],
418 retention=True),
419 'pending_flex_downtime': IntegerProp(
420 default=0,
421 fill_brok=['full_status'],
422 retention=True),
423 'timeout': IntegerProp(
424 default=0,
425 fill_brok=['full_status', 'check_result'],
426 retention=True),
427 'start_time': IntegerProp(
428 default=0,
429 fill_brok=['full_status', 'check_result'],
430 retention=True),
431 'end_time': IntegerProp(
432 default=0,
433 fill_brok=['full_status', 'check_result'],
434 retention=True),
435 'early_timeout': IntegerProp(
436 default=0,
437 fill_brok=['full_status', 'check_result'],
438 retention=True),
439 'return_code': IntegerProp(
440 default=0,
441 fill_brok=['full_status', 'check_result'],
442 retention=True),
443 'perf_data': StringProp(
444 default='',
445 fill_brok=['full_status', 'check_result'],
446 retention=True),
447 'last_perf_data': StringProp(
448 default='',
449 retention=True),
450 'customs': StringProp(default={}, fill_brok=['full_status']),
452 'got_default_realm' : BoolProp(default=False),
454 # use for having all contacts we have notified
455 'notified_contacts': StringProp(
456 default=set()),
458 'in_scheduled_downtime': BoolProp(
459 default=False,
460 retention=True),
461 'in_scheduled_downtime_during_last_check': BoolProp(
462 default=False,
463 retention=True),
465 # put here checks and notif raised
466 'actions': StringProp(
467 default=[]),
468 # and here broks raised
469 'broks': StringProp(
470 default=[]),
472 # For knowing with which elements we are in relation
473 # of dep.
474 # childs are the hosts that have US as parent, so
475 # only a network dep
476 'childs': StringProp(
477 brok_transformation=to_hostnames_list,
478 default=[],
479 fill_brok=['full_status']),
480 # Here it's the elements we are depending on
481 # so our parents as network relation, or a host
482 # we are depending in a hostdependency
483 # or even if we are businesss based.
484 'parent_dependencies' : StringProp(
485 brok_transformation=to_svc_hst_distinct_lists,
486 default=[],
487 fill_brok=['full_status']),
488 # Here it's the guys taht depend on us. So it's the total
489 # oposite of the parent_dependencies
490 'child_dependencies': StringProp(
491 brok_transformation=to_svc_hst_distinct_lists,
492 default=[],
493 fill_brok=['full_status']),
495 # All errors and warning raised during the configuration parsing
496 # and taht will raised real warning/errors during the is_correct
497 'configuration_warnings': StringProp(default=[]),
498 'configuration_errors': StringProp(default=[]),
500 ### Problem/impact part
501 'is_problem': StringProp(
502 default=False,
503 fill_brok=['full_status']),
504 'is_impact': StringProp(
505 default=False,
506 fill_brok=['full_status']),
507 # the save value of our criticity for "problems"
508 'my_own_criticity': IntegerProp(default=-1),
510 # list of problems that make us an impact
511 'source_problems': StringProp(
512 brok_transformation=to_svc_hst_distinct_lists,
513 default=[],
514 fill_brok=['full_status']),
516 # list of the impact I'm the cause of
517 'impacts': StringProp(
518 brok_transformation=to_svc_hst_distinct_lists,
519 default=[],
520 fill_brok=['full_status']),
522 # keep a trace of the old state before being an impact
523 'state_before_impact': StringProp(default='PENDING'),
524 # keep a trace of the old state id before being an impact
525 'state_id_before_impact': StringProp(default=0),
526 # if the state change, we know so we do not revert it
527 'state_changed_since_impact': StringProp(default=False),
529 #BUSINESS CORRELATOR PART
530 # Say if we are business based rule or not
531 'got_business_rule' : BoolProp(default=False, fill_brok=['full_status']),
532 # Our Dependency node for the business rule
533 'business_rule' : StringProp(default=None),
535 # Manage the unkown/unreach during hard state
536 # From now its not really used
537 'in_hard_unknown_reach_phase' : BoolProp(default=False, retention=True),
538 'was_in_hard_unknown_reach_phase' : BoolProp(default=False, retention=True),
539 'state_before_hard_unknown_reach_phase' : StringProp(default='UP', retention=True),
542 # Hosts macros and prop that give the information
543 # the prop can be callable or not
544 macros = {'HOSTNAME' : 'host_name',
545 'HOSTDISPLAYNAME' : 'display_name',
546 'HOSTALIAS' : 'alias',
547 'HOSTADDRESS' : 'address',
548 'HOSTSTATE' : 'state',
549 'HOSTSTATEID' : 'state_id',
550 'LASTHOSTSTATE' : 'last_state',
551 'LASTHOSTSTATEID' : 'last_state_id',
552 'HOSTSTATETYPE' : 'state_type',
553 'HOSTATTEMPT' : 'attempt',
554 'MAXHOSTATTEMPTS' : 'max_check_attempts',
555 'HOSTEVENTID' : 'current_event_id',
556 'LASTHOSTEVENTID' : 'last_event_id',
557 'HOSTPROBLEMID' : 'current_problem_id',
558 'LASTHOSTPROBLEMID' : 'last_problem_id',
559 'HOSTLATENCY' : 'latency',
560 'HOSTEXECUTIONTIME' : 'execution_time',
561 'HOSTDURATION' : 'get_duration',
562 'HOSTDURATIONSEC' : 'get_duration_sec',
563 'HOSTDOWNTIME' : 'get_downtime',
564 'HOSTPERCENTCHANGE' : 'percent_state_change',
565 'HOSTGROUPNAME' : 'get_groupname',
566 'HOSTGROUPNAMES' : 'get_groupnames',
567 'LASTHOSTCHECK' : 'last_chk',
568 'LASTHOSTSTATECHANGE' : 'last_state_change',
569 'LASTHOSTUP' : 'last_time_up',
570 'LASTHOSTDOWN' : 'last_time_down',
571 'LASTHOSTUNREACHABLE' : 'last_time_unreachable',
572 'HOSTOUTPUT' : 'output',
573 'LONGHOSTOUTPUT' : 'long_output',
574 'HOSTPERFDATA' : 'perf_data',
575 'LASTHOSTPERFDATA' : 'last_perf_data',
576 'HOSTCHECKCOMMAND' : 'get_check_command',
577 'HOSTACKAUTHOR' : 'get_ack_author_name',
578 'HOSTACKAUTHORNAME' : 'get_ack_author_name',
579 'HOSTACKAUTHORALIAS' : 'get_ack_author_name',
580 'HOSTACKCOMMENT' : 'get_ack_comment',
581 'HOSTACTIONURL' : 'action_url',
582 'HOSTNOTESURL' : 'notes_url',
583 'HOSTNOTES' : 'notes',
584 'TOTALHOSTSERVICES' : 'get_total_services',
585 'TOTALHOSTSERVICESOK' : 'get_total_services_ok',
586 'TOTALHOSTSERVICESWARNING' : 'get_total_services_warning',
587 'TOTALHOSTSERVICESUNKNOWN' : 'get_total_services_unknown',
588 'TOTALHOSTSERVICESCRITICAL' : 'get_total_services_critical'
592 # This tab is used to transform old parameters name into new ones
593 # so from Nagios2 format, to Nagios3 ones
594 old_properties = {
595 'normal_check_interval' : 'check_interval',
596 'retry_check_interval' : 'retry_interval'
600 def clean(self):
601 pass
604 # Call by picle for data-ify the host
605 # we do a dict because list are too dangerous for
606 # retention save and co :( even if it's more
607 # extensive
608 # The setstate function do the inverse
609 def __getstate__(self):
610 cls = self.__class__
611 # id is not in *_properties
612 res = {'id' : self.id}
613 for prop in cls.properties:
614 if hasattr(self, prop):
615 res[prop] = getattr(self, prop)
616 for prop in cls.running_properties:
617 if hasattr(self, prop):
618 res[prop] = getattr(self, prop)
619 return res
622 # Inversed funtion of getstate
623 def __setstate__(self, state):
624 cls = self.__class__
625 self.id = state['id']
626 for prop in cls.properties:
627 if prop in state:
628 setattr(self, prop, state[prop])
629 for prop in cls.running_properties:
630 if prop in state:
631 setattr(self, prop, state[prop])
635 # Fill adresse with host_name if not already set
636 def fill_predictive_missing_parameters(self):
637 if hasattr(self, 'host_name') and not hasattr(self, 'address'):
638 self.address = self.host_name
639 if hasattr(self, 'host_name') and not hasattr(self, 'alias'):
640 self.alias = self.host_name
644 # Check is required prop are set:
645 # contacts OR contactgroups is need
646 def is_correct(self):
647 state = True #guilty or not? :)
648 cls = self.__class__
650 special_properties = ['contacts', 'contact_groups', 'check_period', \
651 'notification_interval', 'check_period']
652 for prop in cls.properties:
653 if prop not in special_properties:
654 if not hasattr(self, prop) and cls.properties[prop].required:
655 logger.log("%s : I do not have %s" % (self.get_name(), prop))
656 state = False #Bad boy...
658 # Raised all previously saw errors like unknown contacts and co
659 if self.configuration_errors != []:
660 state = False
661 for err in self.configuration_errors:
662 logger.log(err)
664 # Ok now we manage special cases...
665 if not hasattr(self, 'contacts') and not hasattr(self, 'contact_groups') and self.notifications_enabled == True:
666 logger.log("%s : I do not have contacts nor contact_groups" % self.get_name())
667 state = False
669 if getattr(self, 'check_command', None) is None:
670 logger.log("%s : I've got no check_command" % self.get_name())
671 state = False
672 # Ok got a command, but maybe it's invalid
673 else:
674 if not self.check_command.is_valid():
675 logger.log("%s : my check_command %s is invalid" % (self.get_name(), self.check_command.command))
676 state = False
677 if self.got_business_rule:
678 if not self.business_rule.is_valid():
679 logger.log("%s : my business rule is invalid" % (self.get_name(),))
680 for bperror in self.business_rule.configuration_errors:
681 logger.log("%s : %s" % (self.get_name(), bperror))
682 state = False
684 if not hasattr(self, 'notification_interval') and self.notifications_enabled == True:
685 logger.log("%s : I've got no notification_interval but I've got notifications enabled" % self.get_name())
686 state = False
688 # If active check is enabled with a check_interval!=0, we must have a check_period
689 if ( getattr(self, 'active_checks_enabled', False)
690 and getattr(self, 'check_period', None) is None
691 and getattr(self, 'check_interval', 1) != 0 ):
692 logger.log("%s : My check_period is not correct" % self.get_name())
693 state = False
695 if getattr(self, 'realm', None) is None:
696 logger.log("%s : My realm is not correct" % self.get_name())
697 state = False
698 if not hasattr(self, 'check_period'):
699 self.check_period = None
700 if hasattr(self, 'host_name'):
701 for c in cls.illegal_object_name_chars:
702 if c in self.host_name:
703 logger.log("%s : My host_name got the caracter %s that is not allowed." % (self.get_name(), c))
704 state = False
705 return state
708 # Search in my service if I've got the service
709 def find_service_by_name(self, service_description):
710 for s in self.services:
711 if s.service_description == service_description:
712 return s
713 return None
716 # Macro part
717 def get_total_services(self):
718 return str(len(self.services))
721 def get_total_services_ok(self):
722 return str(len([s for s in self.services if s.state_id == 0]))
725 def get_total_services_warning(self):
726 return str(len([s for s in self.services if s.state_id == 1]))
729 def get_total_services_critical(self):
730 return str(len([s for s in self.services if s.state_id == 2]))
733 def get_total_services_unknown(self):
734 return str(len([s for s in self.services if s.state_id == 3]))
737 def get_ack_author_name(self):
738 if self.acknowledgement == None:
739 return ''
740 return self.acknowledgement.author
743 def get_ack_comment(self):
744 if self.acknowledgement == None:
745 return ''
746 return self.acknowledgement.comment
749 def get_check_command(self):
750 return self.check_command.get_name()
753 # For get a nice name
754 def get_name(self):
755 if not self.is_tpl():
756 return self.host_name
757 else:
758 return self.name
761 # For debugin purpose only
762 def get_dbg_name(self):
763 return self.host_name
766 # Say if we got the other in one of your dep list
767 def is_linked_with_host(self, other):
768 for (h, status, type, timeperiod, inherits_parent) in self.act_depend_of:
769 if h == other:
770 return True
771 return False
774 # Delete all links in the act_depend_of list of self and other
775 def del_host_act_dependancy(self, other):
776 to_del = []
777 # First we remove in my list
778 for (h, status, type, timeperiod, inherits_parent) in self.act_depend_of:
779 if h == other:
780 to_del.append( (h, status, type, timeperiod, inherits_parent))
781 for t in to_del:
782 self.act_depend_of.remove(t)
784 #And now in the father part
785 to_del = []
786 for (h, status, type, timeperiod, inherits_parent) in other.act_depend_of_me:
787 if h == self:
788 to_del.append( (h, status, type, timeperiod, inherits_parent) )
789 for t in to_del:
790 other.act_depend_of_me.remove(t)
793 # Add a dependancy for action event handler, notification, etc)
794 # and add ourself in it's dep list
795 def add_host_act_dependancy(self, h, status, timeperiod, inherits_parent):
796 # I add him in MY list
797 self.act_depend_of.append( (h, status, 'logic_dep', timeperiod, inherits_parent) )
798 # And I add me in it's list
799 h.act_depend_of_me.append( (self, status, 'logic_dep', timeperiod, inherits_parent) )
801 # And the parent/child dep lists too
802 h.register_son_in_parent_child_dependencies(self)
805 # Register the dependancy between 2 service for action (notification etc)
806 # but based on a BUSINESS rule, so on fact:
807 # ERP depend on database, so we fill just database.act_depend_of_me
808 # because we will want ERP mails to go on! So call this
809 # on the database service with the srv=ERP service
810 def add_business_rule_act_dependancy(self, h, status, timeperiod, inherits_parent):
811 # first I add the other the I depend on in MY list
812 # self.act_depend_of.append( (srv, status, 'logic_dep',
813 # timeperiod, inherits_parent) )
814 # I only register so he know that I WILL be a inpact
815 self.act_depend_of_me.append( (h, status, 'business_dep',
816 timeperiod, inherits_parent) )
818 # And the parent/child dep lists too
819 self.register_son_in_parent_child_dependencies(h)
822 # Add a dependancy for check (so before launch)
823 def add_host_chk_dependancy(self, h, status, timeperiod, inherits_parent):
824 # I add him in MY list
825 self.chk_depend_of.append( (h, status, 'logic_dep', timeperiod, inherits_parent) )
826 # And I add me in it's list
827 h.chk_depend_of_me.append( (self, status, 'logic_dep', timeperiod, inherits_parent) )
829 # And we fill parent/childs dep for brok purpose
830 # Here self depend on h
831 h.register_son_in_parent_child_dependencies(self)
834 # Add one of our service to services (at linkify)
835 def add_service_link(self, service):
836 self.services.append(service)
839 # Set unreachable : all our parents are down!
840 # We have a special state, but state was already set, we just need to
841 # update it. We are no DOWN, we are UNREACHABLE and
842 # got a state id is 2
843 def set_unreachable(self):
844 now = time.time()
845 self.state_id = 2
846 self.state = 'UNREACHABLE'
847 self.last_time_unreachable = int(now)
850 # We just go an impact, so we go unreachable
851 # But only if we enable this stte change in the conf
852 def set_impact_state(self):
853 cls = self.__class__
854 if cls.enable_problem_impacts_states_change:
855 # Keep a trace of the old state (problem came back before
856 # a new checks)
857 self.state_before_impact = self.state
858 self.state_id_before_impact = self.state_id
859 # This flag will know if we overide the impact state
860 self.state_changed_since_impact = False
861 self.state = 'UNREACHABLE'#exit code UNDETERMINED
862 self.state_id = 2
865 # Ok, we are no more an impact, if no news checks
866 # overide the impact state, we came back to old
867 # states
868 # And only if impact state change is set in configuration
869 def unset_impact_state(self):
870 cls = self.__class__
871 if cls.enable_problem_impacts_states_change and not self.state_changed_since_impact:
872 self.state = self.state_before_impact
873 self.state_id = self.state_id_before_impact
876 # set the state in UP, DOWN, or UNDETERMINED
877 # with the status of a check. Also update last_state
878 def set_state_from_exit_status(self, status):
879 now = time.time()
880 self.last_state_update = now
882 # we should put in last_state the good last state:
883 # if not just change the state by an problem/impact
884 # we can take current state. But if it's the case, the
885 # real old state is self.state_before_impact (it's teh TRUE
886 # state in fact)
887 # And only if we enable the impact state change
888 cls = self.__class__
889 if cls.enable_problem_impacts_states_change and self.is_impact and not self.state_changed_since_impact:
890 self.last_state = self.state_before_impact
891 else:
892 self.last_state = self.state
894 if status == 0:
895 self.state = 'UP'
896 self.state_id = 0
897 self.last_time_up = int(self.last_state_update)
898 state_code = 'u'
899 elif status in (1, 2, 3):
900 self.state = 'DOWN'
901 self.state_id = 1
902 self.last_time_down = int(self.last_state_update)
903 state_code = 'd'
904 else:
905 self.state = 'DOWN'#exit code UNDETERMINED
906 self.state_id = 1
907 self.last_time_down = int(self.last_state_update)
908 state_code = 'd'
909 if state_code in self.flap_detection_options:
910 self.add_flapping_change(self.state != self.last_state)
911 if self.state != self.last_state:
912 self.last_state_change = self.last_state_update
913 self.duration_sec = now - self.last_state_change
916 # See if status is status. Can be low of high format (o/UP, d/DOWN, ...)
917 def is_state(self, status):
918 if status == self.state:
919 return True
920 # Now low status
921 elif status == 'o' and self.state == 'UP':
922 return True
923 elif status == 'd' and self.state == 'DOWN':
924 return True
925 elif status == 'u' and self.state == 'UNREACHABLE':
926 return True
927 return False
930 # The last time when the state was not UP
931 def last_time_non_ok_or_up(self):
932 if self.last_time_down > self.last_time_up:
933 last_time_non_up = self.last_time_down
934 else:
935 last_time_non_up = 0
936 return last_time_non_up
939 # Add a log entry with a HOST ALERT like:
940 # HOST ALERT: server;DOWN;HARD;1;I don't know what to say...
941 def raise_alert_log_entry(self):
942 logger.log('HOST ALERT: %s;%s;%s;%d;%s' % (self.get_name(), self.state, self.state_type, self.attempt, self.output))
945 # Add a log entry with a Freshness alert like:
946 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
947 # I'm forcing an immediate check of the host.
948 def raise_freshness_log_entry(self, t_stale_by, t_threshold):
949 logger.log("Warning: The results of host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the host." \
950 % (self.get_name(), format_t_into_dhms_format(t_stale_by), format_t_into_dhms_format(t_threshold)))
953 # Raise a log entry with a Notification alert like
954 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
955 def raise_notification_log_entry(self, n):
956 contact = n.contact
957 command = n.command_call
958 if n.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
959 state = '%s (%s)' % (n.type, self.state)
960 else:
961 state = self.state
962 if self.__class__.log_notifications:
963 logger.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact.get_name(), self.get_name(), state, \
964 command.get_name(), self.output))
966 # Raise a log entry with a Eventhandler alert like
967 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
968 def raise_event_handler_log_entry(self, command):
969 if self.__class__.log_event_handlers:
970 logger.log("HOST EVENT HANDLER: %s;%s;%s;%s;%s" % (self.get_name(), self.state, self.state_type, self.attempt, \
971 command.get_name()))
974 #Raise a log entry with FLAPPING START alert like
975 #HOST FLAPPING ALERT: server;STARTED; Host appears to have started flapping (50.6% change >= 50.0% threshold)
976 def raise_flapping_start_log_entry(self, change_ratio, threshold):
977 logger.log("HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%.1f% change >= %.1% threshold)" % \
978 (self.get_name(), change_ratio, threshold))
981 #Raise a log entry with FLAPPING STOP alert like
982 #HOST FLAPPING ALERT: server;STOPPED; host appears to have stopped flapping (23.0% change < 25.0% threshold)
983 def raise_flapping_stop_log_entry(self, change_ratio, threshold):
984 logger.log("HOST FLAPPING ALERT: %s;STOPPED; Host appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
985 (self.get_name(), change_ratio, threshold))
988 #If there is no valid time for next check, raise a log entry
989 def raise_no_next_check_log_entry(self):
990 logger.log("Warning : I cannot schedule the check for the host '%s' because there is not future valid time" % \
991 (self.get_name()))
993 #Raise a log entry when a downtime begins
994 #HOST DOWNTIME ALERT: test_host_0;STARTED; Host has entered a period of scheduled downtime
995 def raise_enter_downtime_log_entry(self):
996 logger.log("HOST DOWNTIME ALERT: %s;STARTED; Host has entered a period of scheduled downtime" % \
997 (self.get_name()))
1000 #Raise a log entry when a downtime has finished
1001 #HOST DOWNTIME ALERT: test_host_0;STOPPED; Host has exited from a period of scheduled downtime
1002 def raise_exit_downtime_log_entry(self):
1003 logger.log("HOST DOWNTIME ALERT: %s;STOPPED; Host has exited from a period of scheduled downtime" % \
1004 (self.get_name()))
1007 #Raise a log entry when a downtime prematurely ends
1008 #HOST DOWNTIME ALERT: test_host_0;CANCELLED; Service has entered a period of scheduled downtime
1009 def raise_cancel_downtime_log_entry(self):
1010 logger.log("HOST DOWNTIME ALERT: %s;CANCELLED; Scheduled downtime for host has been cancelled." % \
1011 (self.get_name()))
1014 #Is stalking ?
1015 #Launch if check is waitconsume==first time
1016 #and if c.status is in self.stalking_options
1017 def manage_stalking(self, c):
1018 need_stalk = False
1019 if c.status == 'waitconsume':
1020 if c.exit_status == 0 and 'o' in self.stalking_options:
1021 need_stalk = True
1022 elif c.exit_status == 1 and 'd' in self.stalking_options:
1023 need_stalk = True
1024 elif c.exit_status == 2 and 'd' in self.stalking_options:
1025 need_stalk = True
1026 elif c.exit_status == 3 and 'u' in self.stalking_options:
1027 need_stalk = True
1028 if c.output != self.output:
1029 need_stalk = False
1030 if need_stalk:
1031 logger.log("Stalking %s : %s" % (self.get_name(), self.output))
1034 #fill act_depend_of with my parents (so network dep)
1035 #and say parents they impact me, no timeperiod and folow parents of course
1036 def fill_parents_dependancie(self):
1037 for parent in self.parents:
1038 if parent is not None:
1039 #I add my parent in my list
1040 self.act_depend_of.append( (parent, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
1042 #And I register myself in my parent list too
1043 parent.register_child(self)
1045 # And add the parent/child dep filling too, for broking
1046 parent.register_son_in_parent_child_dependencies(self)
1049 # Register a child in our lists
1050 def register_child(self, child):
1051 # We've got 2 list : a list for our child
1052 # where we just put the pointer, it's jsut for broking
1053 # and anotehr with all data, useful for 'running' part
1054 self.childs.append(child)
1055 self.act_depend_of_me.append( (child, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
1058 #Give data for checks's macros
1059 def get_data_for_checks(self):
1060 return [self]
1062 #Give data for event handler's macro
1063 def get_data_for_event_handler(self):
1064 return [self]
1066 #Give data for notifications'n macros
1067 def get_data_for_notifications(self, contact, n):
1068 return [self, contact, n]
1071 #See if the notification is launchable (time is OK and contact is OK too)
1072 def notification_is_blocked_by_contact(self, n, contact):
1073 return not contact.want_host_notification(self.last_chk, self.state, n.type, self.criticity)
1076 #MACRO PART
1077 def get_duration_sec(self):
1078 return str(int(self.duration_sec))
1081 def get_duration(self):
1082 m, s = divmod(self.duration_sec, 60)
1083 h, m = divmod(m, 60)
1084 return "%02dh %02dm %02ds" % (h, m, s)
1087 #Check if a notification for this host is suppressed at this time
1088 #This is a check at the host level. Do not look at contacts here
1089 def notification_is_blocked_by_item(self, type, t_wished = None):
1090 if t_wished == None:
1091 t_wished = time.time()
1093 # TODO
1094 # forced notification -> false
1095 # custom notification -> false
1097 # Block if notifications are program-wide disabled
1098 if not self.enable_notifications:
1099 return True
1101 # Does the notification period allow sending out this notification?
1102 if not self.notification_period.is_time_valid(t_wished):
1103 return True
1105 # Block if notifications are disabled for this host
1106 if not self.notifications_enabled:
1107 return True
1109 # Block if the current status is in the notification_options d,u,r,f,s
1110 if 'n' in self.notification_options:
1111 return True
1113 if type in ('PROBLEM', 'RECOVERY'):
1114 if self.state == 'DOWN' and not 'd' in self.notification_options:
1115 return True
1116 if self.state == 'UP' and not 'r' in self.notification_options:
1117 return True
1118 if self.state == 'UNREACHABLE' and not 'u' in self.notification_options:
1119 return True
1120 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
1121 and not 'f' in self.notification_options):
1122 return True
1123 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
1124 and not 's' in self.notification_options):
1125 return True
1127 # Acknowledgements make no sense when the status is ok/up
1128 if type == 'ACKNOWLEDGEMENT':
1129 if self.state == self.ok_up:
1130 return True
1132 # Flapping
1133 if type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
1134 # todo block if not notify_on_flapping
1135 if self.scheduled_downtime_depth > 0:
1136 return True
1138 # When in deep downtime, only allow end-of-downtime notifications
1139 # In depth 1 the downtime just started and can be notified
1140 if self.scheduled_downtime_depth > 1 and not type in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
1141 return True
1143 # Block if in a scheduled downtime and a problem arises
1144 if self.scheduled_downtime_depth > 0 and type in ('PROBLEM', 'RECOVERY'):
1145 return True
1147 # Block if the status is SOFT
1148 if self.state_type == 'SOFT' and type == 'PROBLEM':
1149 return True
1151 # Block if the problem has already been acknowledged
1152 if self.problem_has_been_acknowledged and type != 'ACKNOWLEDGEMENT':
1153 return True
1155 # Block if flapping
1156 if self.is_flapping:
1157 return True
1159 return False
1162 #Get a oc*p command if item has obsess_over_*
1163 #command. It must be enabled locally and globally
1164 def get_obsessive_compulsive_processor_command(self):
1165 cls = self.__class__
1166 if not cls.obsess_over or not self.obsess_over_host:
1167 return
1169 m = MacroResolver()
1170 data = self.get_data_for_event_handler()
1171 cmd = m.resolve_command(cls.ochp_command, data)
1172 e = EventHandler(cmd, timeout=cls.ochp_timeout)
1174 #ok we can put it in our temp action queue
1175 self.actions.append(e)
1180 class Hosts(Items):
1181 name_property = "host_name" #use for the search by name
1182 inner_class = Host #use for know what is in items
1185 #prepare_for_conf_sending to flatten some properties
1186 def prepare_for_sending(self):
1187 for h in self:
1188 h.prepare_for_conf_sending()
1191 #Create link between elements:
1192 #hosts -> timeperiods
1193 #hosts -> hosts (parents, etc)
1194 #hosts -> commands (check_command)
1195 #hosts -> contacts
1196 def linkify(self, timeperiods=None, commands=None, contacts=None, realms=None, resultmodulations=None, escalations=None, hostgroups=None):
1197 self.linkify_with_timeperiods(timeperiods, 'notification_period')
1198 self.linkify_with_timeperiods(timeperiods, 'check_period')
1199 self.linkify_with_timeperiods(timeperiods, 'maintenance_period')
1200 self.linkify_h_by_h()
1201 self.linkify_h_by_hg(hostgroups)
1202 self.linkify_one_command_with_commands(commands, 'check_command')
1203 self.linkify_one_command_with_commands(commands, 'event_handler')
1205 self.linkify_with_contacts(contacts)
1206 self.linkify_h_by_realms(realms)
1207 self.linkify_with_resultmodulations(resultmodulations)
1208 #WARNING: all escalations will not be link here
1209 #(just the escalation here, not serviceesca or hostesca).
1210 #This last one will be link in escalations linkify.
1211 self.linkify_with_escalations(escalations)
1214 #Fill adress by host_name if not set
1215 def fill_predictive_missing_parameters(self):
1216 for h in self:
1217 h.fill_predictive_missing_parameters()
1220 #Link host with hosts (parents)
1221 def linkify_h_by_h(self):
1222 for h in self:
1223 parents = h.parents
1224 #The new member list
1225 new_parents = []
1226 for parent in parents:
1227 parent = parent.strip()
1228 p = self.find_by_name(parent)
1229 if p != None:
1230 new_parents.append(p)
1231 else:
1232 err = "Error : the parent '%s' on host '%s' is unknown!" % (parent, h.get_name())
1233 self.configuration_errors.append(err)
1234 #print "Me,", h.host_name, "define my parents", new_parents
1235 #We find the id, we remplace the names
1236 h.parents = new_parents
1239 #Link with realms and set a default realm if none
1240 def linkify_h_by_realms(self, realms):
1241 default_realm = None
1242 for r in realms:
1243 if getattr(r, 'default', False):
1244 default_realm = r
1245 if default_realm == None:
1246 print "Error : there is no default realm defined!"
1247 for h in self:
1248 #print h.get_name(), h.realm
1249 if h.realm != None:
1250 p = realms.find_by_name(h.realm.strip())
1251 if p != None:
1252 h.realm = p
1253 print "Host", h.get_name(), "is in the realm", p.get_name()
1254 else:
1255 err = "Error : the host %s got a invalid realm (%s)!" % (h.get_name(), h.realm)
1256 h.configuration_errors.append(err)
1257 h.realm = None
1258 else:
1259 #print "Notice : applying default realm %s to host %s" % (default_realm.get_name(), h.get_name())
1260 h.realm = default_realm
1261 h.got_default_realm = True
1264 #We look for hostgroups property in hosts and
1265 #link them
1266 def linkify_h_by_hg(self, hostgroups):
1267 #Hostgroups property need to be fullfill for got the informations
1268 #self.apply_partial_inheritance('hostgroups')
1269 #self.apply_partial_inheritance('contact_groups')
1271 #Register host in the hostgroups
1272 for h in self:
1273 if not h.is_tpl():
1274 new_hostgroups = []
1275 if hasattr(h, 'hostgroups') and h.hostgroups != '':
1276 hgs = h.hostgroups.split(',')
1277 for hg_name in hgs:
1278 hg_name = hg_name.strip()
1279 hg = hostgroups.find_by_name(hg_name)
1280 if hg != None:
1281 new_hostgroups.append(hg)
1282 else:
1283 err = "Error : the hostgroup '%s' of the host '%s' is unknown" % (hg_name, h.host_name)
1284 h.configuration_errors.append(err)
1285 h.hostgroups = new_hostgroups
1289 #It's used to change old Nagios2 names to
1290 #Nagios3 ones
1291 def old_properties_names_to_new(self):
1292 for h in self:
1293 h.old_properties_names_to_new()
1297 #We look for hostgroups property in hosts and
1298 def explode(self, hostgroups, contactgroups):
1299 #Hostgroups property need to be fullfill for got the informations
1300 #self.apply_partial_inheritance('hostgroups')
1301 #self.apply_partial_inheritance('contact_groups')
1303 #Register host in the hostgroups
1304 for h in self:
1305 if not h.is_tpl() and hasattr(h, 'host_name'):
1306 hname = h.host_name
1307 if hasattr(h, 'hostgroups'):
1308 hgs = h.hostgroups.split(',')
1309 for hg in hgs:
1310 hostgroups.add_member(hname, hg.strip())
1312 #items::explode_contact_groups_into_contacts
1313 #take all contacts from our contact_groups into our contact property
1314 self.explode_contact_groups_into_contacts(contactgroups)
1318 #Create depenancies:
1319 #Depencies at the host level: host parent
1320 def apply_dependancies(self):
1321 for h in self:
1322 h.fill_parents_dependancie()
1325 #Parent graph: use to find quickly relations between all host, and loop
1326 #return True if tehre is a loop
1327 def no_loop_in_parents(self):
1328 #Ok, we say "from now, no loop :) "
1329 r = True
1331 #Create parent graph
1332 parents = Graph()
1334 #With all hosts as nodes
1335 for h in self:
1336 if h != None:
1337 parents.add_node(h)
1339 #And now fill edges
1340 for h in self:
1341 for p in h.parents:
1342 if p != None:
1343 parents.add_edge(p, h)
1345 #Now get the list of all hosts in a loop
1346 host_in_loops = parents.loop_check()
1348 #and raise errors about it
1349 for h in host_in_loops:
1350 logger.log("Error: The host '%s' is part of a circular parent/child chain!" % h.get_name())
1351 r = False
1353 return r
1356 #Return a list of the host_name of the hosts
1357 #that gotthe template with name=tpl_name
1358 def find_hosts_that_use_template(self, tpl_name):
1359 res = []
1360 #first find the template
1361 tpl = None
1362 for h in self:
1363 #Look fortemplate with the good name
1364 if h.is_tpl() and hasattr(h, 'name') and h.name == tpl_name:
1365 tpl = h
1367 #If we find noone, we return nothing (easy case:) )
1368 if tpl == None:
1369 return []
1371 #Ok, we find the tpl
1372 for h in self:
1373 if tpl in h.templates and hasattr(h, 'host_name'):
1374 res.append(h.host_name)
1376 return res
1379 # Will create all business tree for the
1380 # services
1381 def create_business_rules(self, hosts, services):
1382 for h in self:
1383 h.create_business_rules(hosts, services)
1386 # Will link all business service/host with theirs
1387 # dep for problem/impact link
1388 def create_business_rules_dependencies(self):
1389 for h in self:
1390 h.create_business_rules_dependencies()