2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
5 # Gregory Starck, g.starck@gmail.com
7 #This file is part of Shinken.
9 #Shinken is free software: you can redistribute it and/or modify
10 #it under the terms of the GNU Affero General Public License as published by
11 #the Free Software Foundation, either version 3 of the License, or
12 #(at your option) any later version.
14 #Shinken is distributed in the hope that it will be useful,
15 #but WITHOUT ANY WARRANTY; without even the implied warranty of
16 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 #GNU Affero General Public License for more details.
19 #You should have received a copy of the GNU Affero General Public License
20 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
22 """ This is the main class for the Host. In fact it's mainly
23 about the configuration part. for the running one, it's better
24 to look at the schedulingitem class that manage all
25 scheduling/consome check smart things :)
29 import re
#for keys generator
31 from shinken
.autoslots
import AutoSlots
32 from shinken
.item
import Items
33 from shinken
.schedulingitem
import SchedulingItem
34 from shinken
.util
import to_int
, to_float
, to_char
, to_split
, to_bool
, format_t_into_dhms_format
, to_hostnames_list
, get_obj_name
, to_svc_hst_distinct_lists
, to_list_string_of_names
, expand_with_macros
35 from shinken
.property import UnusedProp
, BoolProp
, IntegerProp
, FloatProp
, CharProp
, StringProp
, ListProp
36 from shinken
.graph
import Graph
37 from shinken
.macroresolver
import MacroResolver
38 from shinken
.eventhandler
import EventHandler
39 from shinken
.log
import logger
41 class Host(SchedulingItem
):
42 #AutoSlots create the __slots__ with properties and
43 #running_properties names
44 __metaclass__
= AutoSlots
46 id = 1 #0 is reserved for host (primary node for parents)
51 # properties defined by configuration
52 # *required : is required in conf
53 # *default : default value if no set in conf
54 # *pythonize : function to call when transfort string to python object
55 # *fill_brok : if set, send to broker. there are two categories: full_status for initial and update status, check_result for check results
56 # *no_slots : do not take this property for __slots__
57 # Only for the inital call
58 # conf_send_preparation : if set, will pass the property to this function. It's used to "flatten"
59 # some dangerous properties like realms that are too 'linked' to be send like that.
60 # brok_transformation : if set, will call the function with the value of the property
61 # the major times it will be to flatten the data (like realm_name instead of the realm object).
63 'host_name': StringProp(fill_brok
=['full_status', 'check_result', 'next_schedule']),
64 'alias': StringProp(fill_brok
=['full_status']),
65 'display_name': StringProp(
67 fill_brok
=['full_status']),
68 'address': StringProp(fill_brok
=['full_status']),
70 brok_transformation
=to_hostnames_list
,
72 fill_brok
=['full_status']),
73 'hostgroups': StringProp(
74 brok_transformation
=to_list_string_of_names
,
76 fill_brok
=['full_status']),
77 'check_command': StringProp(
78 default
='_internal_host_up',
79 fill_brok
=['full_status']),
80 'initial_state': CharProp(
82 fill_brok
=['full_status']),
83 'max_check_attempts': IntegerProp(fill_brok
=['full_status']),
84 'check_interval': IntegerProp(
86 fill_brok
=['full_status']),
87 'retry_interval': IntegerProp(
89 fill_brok
=['full_status']),
90 'active_checks_enabled': BoolProp(
92 fill_brok
=['full_status']),
93 'passive_checks_enabled': BoolProp(
95 fill_brok
=['full_status']),
96 'check_period': StringProp(fill_brok
=['full_status']),
97 'obsess_over_host': BoolProp(
99 fill_brok
=['full_status']),
100 'check_freshness': BoolProp(
102 fill_brok
=['full_status']),
103 'freshness_threshold': IntegerProp(
105 fill_brok
=['full_status']),
106 'event_handler': StringProp(
108 fill_brok
=['full_status']),
109 'event_handler_enabled': BoolProp(
111 fill_brok
=['full_status']),
112 'low_flap_threshold': IntegerProp(
114 fill_brok
=['full_status']),
115 'high_flap_threshold': IntegerProp(
117 fill_brok
=['full_status']),
118 'flap_detection_enabled': BoolProp(
120 fill_brok
=['full_status']),
121 'flap_detection_options': ListProp(
123 fill_brok
=['full_status']),
124 'process_perf_data': BoolProp(
126 fill_brok
=['full_status']),
127 'retain_status_information': BoolProp(
129 fill_brok
=['full_status']),
130 'retain_nonstatus_information': BoolProp(
132 fill_brok
=['full_status']),
133 'contacts': StringProp(
135 fill_brok
=['full_status']),
136 'contact_groups': StringProp(
138 fill_brok
=['full_status']),
139 'notification_interval': IntegerProp(
141 fill_brok
=['full_status']),
142 'first_notification_delay': IntegerProp(
144 fill_brok
=['full_status']),
145 'notification_period': StringProp(fill_brok
=['full_status']),
146 'notification_options': ListProp(
148 fill_brok
=['full_status']),
149 'notifications_enabled': BoolProp(
151 fill_brok
=['full_status']),
152 'stalking_options': ListProp(
154 fill_brok
=['full_status']),
157 fill_brok
=['full_status']),
158 'notes_url': StringProp(
160 fill_brok
=['full_status']),
161 'action_url': StringProp(
163 fill_brok
=['full_status']),
164 'icon_image': StringProp(
166 fill_brok
=['full_status']),
167 'icon_image_alt': StringProp(
169 fill_brok
=['full_status']),
170 'vrml_image': StringProp(
172 fill_brok
=['full_status']),
173 'statusmap_image': StringProp(
175 fill_brok
=['full_status']),
177 # No slots for this 2 because begin property by a number seems bad
179 '2d_coords': StringProp(
181 fill_brok
=['full_status'],
183 '3d_coords': StringProp(
185 fill_brok
=['full_status'],
187 'failure_prediction_enabled': BoolProp(
189 fill_brok
=['full_status']),
192 # 'fill_brok' is ok because in scheduler it's already
193 # a string from conf_send_preparation
196 fill_brok
=['full_status'],
197 conf_send_preparation
=get_obj_name
),
198 'poller_tag': StringProp(default
=None),
200 'resultmodulations': StringProp(default
=''),
201 'escalations': StringProp(
203 fill_brok
=['full_status']),
204 'maintenance_period': StringProp(
206 fill_brok
=['full_status']),
209 'criticity': IntegerProp(
211 fill_brok
=['full_status']),
215 # properties set only for running purpose
216 # retention : save/load this property from retention
217 running_properties
= {
218 'last_chk': IntegerProp(
220 fill_brok
=['full_status', 'check_result'],
222 'next_chk': IntegerProp(
224 fill_brok
=['full_status', 'next_schedule']),
225 'in_checking': BoolProp(
227 fill_brok
=['full_status', 'check_result', 'next_schedule']),
228 'latency': FloatProp(
230 fill_brok
=['full_status', 'check_result'],
232 'attempt': IntegerProp(
234 fill_brok
=['full_status', 'check_result'],
238 fill_brok
=['full_status'],
240 'state_id': IntegerProp(
242 fill_brok
=['full_status', 'check_result'],
244 'state_type': StringProp(
246 fill_brok
=['full_status'],
248 'state_type_id': IntegerProp(
250 fill_brok
=['full_status', 'check_result'],
252 'current_event_id': StringProp(
254 fill_brok
=['full_status', 'check_result'],
256 'last_event_id': IntegerProp(
258 fill_brok
=['full_status', 'check_result'],
260 'last_state': StringProp(
262 fill_brok
=['full_status'],
264 'last_state_id': IntegerProp(
266 fill_brok
=['full_status'],
268 'last_state_type' : StringProp(
270 fill_brok
=['full_status'],
272 'last_state_change': FloatProp(
274 fill_brok
=['full_status'],
276 'last_hard_state_change': FloatProp(
278 fill_brok
=['full_status', 'check_result'],
280 'last_hard_state': StringProp(
282 fill_brok
=['full_status'],
284 'last_hard_state_id' : IntegerProp(
286 fill_brok
=['full_status'],
288 'last_time_up': IntegerProp(
289 default
=int(time
.time()),
290 fill_brok
=['full_status', 'check_result'],
292 'last_time_down': IntegerProp(
293 default
=int(time
.time()),
294 fill_brok
=['full_status', 'check_result'],
296 'last_time_unreachable': IntegerProp(
297 default
=int(time
.time()),
298 fill_brok
=['full_status', 'check_result'],
300 'duration_sec': IntegerProp(
302 fill_brok
=['full_status'],
304 'output': StringProp(
306 fill_brok
=['full_status', 'check_result'],
308 'long_output': StringProp(
310 fill_brok
=['full_status', 'check_result'],
312 'is_flapping': BoolProp(
314 fill_brok
=['full_status'],
316 'flapping_comment_id': IntegerProp(
318 fill_brok
=['full_status'],
320 # No broks for _depend_of because of to much links to hosts/services
321 # dependencies for actions like notif of event handler, so AFTER check return
322 'act_depend_of': StringProp(default
=[]),
324 # dependencies for checks raise, so BEFORE checks
325 'chk_depend_of': StringProp(default
=[]),
327 # elements that depend of me, so the reverse than just uppper
328 'act_depend_of_me': StringProp(default
=[]),
330 # elements that depend of me
331 'chk_depend_of_me': StringProp(default
=[]),
333 'last_state_update': StringProp(
335 fill_brok
=['full_status'],
338 # no brok ,to much links
339 'services': StringProp(default
=[]),
341 # No broks, it's just internal, and checks have too links
342 'checks_in_progress': StringProp(default
=[]),
344 # No broks, it's just internal, and checks have too links
345 'notifications_in_progress': StringProp(
348 'downtimes': StringProp(
350 fill_brok
=['full_status'],
352 'comments': StringProp(
354 fill_brok
=['full_status'],
356 'flapping_changes': StringProp(
358 fill_brok
=['full_status'],
360 'percent_state_change': FloatProp(
362 fill_brok
=['full_status'],
364 'problem_has_been_acknowledged': BoolProp(
366 fill_brok
=['full_status'],
368 'acknowledgement': StringProp(
371 'acknowledgement_type': IntegerProp(
373 fill_brok
=['full_status', 'check_result'],
375 'check_type': IntegerProp(
377 fill_brok
=['full_status', 'check_result'],
379 'has_been_checked': IntegerProp(
381 fill_brok
=['full_status', 'check_result'],
383 'should_be_scheduled': IntegerProp(
385 fill_brok
=['full_status'],
387 'last_problem_id': IntegerProp(
389 fill_brok
=['full_status', 'check_result'],
391 'current_problem_id': IntegerProp(
393 fill_brok
=['full_status', 'check_result'],
395 'execution_time': FloatProp(
397 fill_brok
=['full_status', 'check_result'],
399 'last_notification': FloatProp(
401 fill_brok
=['full_status'],
403 'current_notification_number': IntegerProp(
405 fill_brok
=['full_status'],
407 'current_notification_id': IntegerProp(
409 fill_brok
=['full_status'],
411 'check_flapping_recovery_notification': BoolProp(
413 fill_brok
=['full_status'],
415 'scheduled_downtime_depth': IntegerProp(
417 fill_brok
=['full_status'],
419 'pending_flex_downtime': IntegerProp(
421 fill_brok
=['full_status'],
423 'timeout': IntegerProp(
425 fill_brok
=['full_status', 'check_result'],
427 'start_time': IntegerProp(
429 fill_brok
=['full_status', 'check_result'],
431 'end_time': IntegerProp(
433 fill_brok
=['full_status', 'check_result'],
435 'early_timeout': IntegerProp(
437 fill_brok
=['full_status', 'check_result'],
439 'return_code': IntegerProp(
441 fill_brok
=['full_status', 'check_result'],
443 'perf_data': StringProp(
445 fill_brok
=['full_status', 'check_result'],
447 'last_perf_data': StringProp(
450 'customs': StringProp(default
={}, fill_brok
=['full_status']),
452 'got_default_realm' : BoolProp(default
=False),
454 # use for having all contacts we have notified
455 'notified_contacts': StringProp(
458 'in_scheduled_downtime': BoolProp(
461 'in_scheduled_downtime_during_last_check': BoolProp(
465 # put here checks and notif raised
466 'actions': StringProp(
468 # and here broks raised
472 # For knowing with which elements we are in relation
474 # childs are the hosts that have US as parent, so
476 'childs': StringProp(
477 brok_transformation
=to_hostnames_list
,
479 fill_brok
=['full_status']),
480 # Here it's the elements we are depending on
481 # so our parents as network relation, or a host
482 # we are depending in a hostdependency
483 # or even if we are businesss based.
484 'parent_dependencies' : StringProp(
485 brok_transformation
=to_svc_hst_distinct_lists
,
487 fill_brok
=['full_status']),
488 # Here it's the guys taht depend on us. So it's the total
489 # oposite of the parent_dependencies
490 'child_dependencies': StringProp(
491 brok_transformation
=to_svc_hst_distinct_lists
,
493 fill_brok
=['full_status']),
495 # All errors and warning raised during the configuration parsing
496 # and taht will raised real warning/errors during the is_correct
497 'configuration_warnings': StringProp(default
=[]),
498 'configuration_errors': StringProp(default
=[]),
500 ### Problem/impact part
501 'is_problem': StringProp(
503 fill_brok
=['full_status']),
504 'is_impact': StringProp(
506 fill_brok
=['full_status']),
507 # the save value of our criticity for "problems"
508 'my_own_criticity': IntegerProp(default
=-1),
510 # list of problems that make us an impact
511 'source_problems': StringProp(
512 brok_transformation
=to_svc_hst_distinct_lists
,
514 fill_brok
=['full_status']),
516 # list of the impact I'm the cause of
517 'impacts': StringProp(
518 brok_transformation
=to_svc_hst_distinct_lists
,
520 fill_brok
=['full_status']),
522 # keep a trace of the old state before being an impact
523 'state_before_impact': StringProp(default
='PENDING'),
524 # keep a trace of the old state id before being an impact
525 'state_id_before_impact': StringProp(default
=0),
526 # if the state change, we know so we do not revert it
527 'state_changed_since_impact': StringProp(default
=False),
529 #BUSINESS CORRELATOR PART
530 # Say if we are business based rule or not
531 'got_business_rule' : BoolProp(default
=False, fill_brok
=['full_status']),
532 # Our Dependency node for the business rule
533 'business_rule' : StringProp(default
=None),
535 # Manage the unkown/unreach during hard state
536 # From now its not really used
537 'in_hard_unknown_reach_phase' : BoolProp(default
=False, retention
=True),
538 'was_in_hard_unknown_reach_phase' : BoolProp(default
=False, retention
=True),
539 'state_before_hard_unknown_reach_phase' : StringProp(default
='UP', retention
=True),
542 # Hosts macros and prop that give the information
543 # the prop can be callable or not
544 macros
= {'HOSTNAME' : 'host_name',
545 'HOSTDISPLAYNAME' : 'display_name',
546 'HOSTALIAS' : 'alias',
547 'HOSTADDRESS' : 'address',
548 'HOSTSTATE' : 'state',
549 'HOSTSTATEID' : 'state_id',
550 'LASTHOSTSTATE' : 'last_state',
551 'LASTHOSTSTATEID' : 'last_state_id',
552 'HOSTSTATETYPE' : 'state_type',
553 'HOSTATTEMPT' : 'attempt',
554 'MAXHOSTATTEMPTS' : 'max_check_attempts',
555 'HOSTEVENTID' : 'current_event_id',
556 'LASTHOSTEVENTID' : 'last_event_id',
557 'HOSTPROBLEMID' : 'current_problem_id',
558 'LASTHOSTPROBLEMID' : 'last_problem_id',
559 'HOSTLATENCY' : 'latency',
560 'HOSTEXECUTIONTIME' : 'execution_time',
561 'HOSTDURATION' : 'get_duration',
562 'HOSTDURATIONSEC' : 'get_duration_sec',
563 'HOSTDOWNTIME' : 'get_downtime',
564 'HOSTPERCENTCHANGE' : 'percent_state_change',
565 'HOSTGROUPNAME' : 'get_groupname',
566 'HOSTGROUPNAMES' : 'get_groupnames',
567 'LASTHOSTCHECK' : 'last_chk',
568 'LASTHOSTSTATECHANGE' : 'last_state_change',
569 'LASTHOSTUP' : 'last_time_up',
570 'LASTHOSTDOWN' : 'last_time_down',
571 'LASTHOSTUNREACHABLE' : 'last_time_unreachable',
572 'HOSTOUTPUT' : 'output',
573 'LONGHOSTOUTPUT' : 'long_output',
574 'HOSTPERFDATA' : 'perf_data',
575 'LASTHOSTPERFDATA' : 'last_perf_data',
576 'HOSTCHECKCOMMAND' : 'get_check_command',
577 'HOSTACKAUTHOR' : 'get_ack_author_name',
578 'HOSTACKAUTHORNAME' : 'get_ack_author_name',
579 'HOSTACKAUTHORALIAS' : 'get_ack_author_name',
580 'HOSTACKCOMMENT' : 'get_ack_comment',
581 'HOSTACTIONURL' : 'action_url',
582 'HOSTNOTESURL' : 'notes_url',
583 'HOSTNOTES' : 'notes',
584 'TOTALHOSTSERVICES' : 'get_total_services',
585 'TOTALHOSTSERVICESOK' : 'get_total_services_ok',
586 'TOTALHOSTSERVICESWARNING' : 'get_total_services_warning',
587 'TOTALHOSTSERVICESUNKNOWN' : 'get_total_services_unknown',
588 'TOTALHOSTSERVICESCRITICAL' : 'get_total_services_critical'
592 # This tab is used to transform old parameters name into new ones
593 # so from Nagios2 format, to Nagios3 ones
595 'normal_check_interval' : 'check_interval',
596 'retry_check_interval' : 'retry_interval'
604 # Call by picle for data-ify the host
605 # we do a dict because list are too dangerous for
606 # retention save and co :( even if it's more
608 # The setstate function do the inverse
609 def __getstate__(self
):
611 # id is not in *_properties
612 res
= {'id' : self
.id}
613 for prop
in cls
.properties
:
614 if hasattr(self
, prop
):
615 res
[prop
] = getattr(self
, prop
)
616 for prop
in cls
.running_properties
:
617 if hasattr(self
, prop
):
618 res
[prop
] = getattr(self
, prop
)
622 # Inversed funtion of getstate
623 def __setstate__(self
, state
):
625 self
.id = state
['id']
626 for prop
in cls
.properties
:
628 setattr(self
, prop
, state
[prop
])
629 for prop
in cls
.running_properties
:
631 setattr(self
, prop
, state
[prop
])
635 # Fill adresse with host_name if not already set
636 def fill_predictive_missing_parameters(self
):
637 if hasattr(self
, 'host_name') and not hasattr(self
, 'address'):
638 self
.address
= self
.host_name
639 if hasattr(self
, 'host_name') and not hasattr(self
, 'alias'):
640 self
.alias
= self
.host_name
644 # Check is required prop are set:
645 # contacts OR contactgroups is need
646 def is_correct(self
):
647 state
= True #guilty or not? :)
650 special_properties
= ['contacts', 'contact_groups', 'check_period', \
651 'notification_interval', 'check_period']
652 for prop
in cls
.properties
:
653 if prop
not in special_properties
:
654 if not hasattr(self
, prop
) and cls
.properties
[prop
].required
:
655 logger
.log("%s : I do not have %s" % (self
.get_name(), prop
))
656 state
= False #Bad boy...
658 # Raised all previously saw errors like unknown contacts and co
659 if self
.configuration_errors
!= []:
661 for err
in self
.configuration_errors
:
664 # Ok now we manage special cases...
665 if not hasattr(self
, 'contacts') and not hasattr(self
, 'contact_groups') and self
.notifications_enabled
== True:
666 logger
.log("%s : I do not have contacts nor contact_groups" % self
.get_name())
669 if getattr(self
, 'check_command', None) is None:
670 logger
.log("%s : I've got no check_command" % self
.get_name())
672 # Ok got a command, but maybe it's invalid
674 if not self
.check_command
.is_valid():
675 logger
.log("%s : my check_command %s is invalid" % (self
.get_name(), self
.check_command
.command
))
677 if self
.got_business_rule
:
678 if not self
.business_rule
.is_valid():
679 logger
.log("%s : my business rule is invalid" % (self
.get_name(),))
680 for bperror
in self
.business_rule
.configuration_errors
:
681 logger
.log("%s : %s" % (self
.get_name(), bperror
))
684 if not hasattr(self
, 'notification_interval') and self
.notifications_enabled
== True:
685 logger
.log("%s : I've got no notification_interval but I've got notifications enabled" % self
.get_name())
688 # If active check is enabled with a check_interval!=0, we must have a check_period
689 if ( getattr(self
, 'active_checks_enabled', False)
690 and getattr(self
, 'check_period', None) is None
691 and getattr(self
, 'check_interval', 1) != 0 ):
692 logger
.log("%s : My check_period is not correct" % self
.get_name())
695 if getattr(self
, 'realm', None) is None:
696 logger
.log("%s : My realm is not correct" % self
.get_name())
698 if not hasattr(self
, 'check_period'):
699 self
.check_period
= None
700 if hasattr(self
, 'host_name'):
701 for c
in cls
.illegal_object_name_chars
:
702 if c
in self
.host_name
:
703 logger
.log("%s : My host_name got the caracter %s that is not allowed." % (self
.get_name(), c
))
708 # Search in my service if I've got the service
709 def find_service_by_name(self
, service_description
):
710 for s
in self
.services
:
711 if s
.service_description
== service_description
:
717 def get_total_services(self
):
718 return str(len(self
.services
))
721 def get_total_services_ok(self
):
722 return str(len([s
for s
in self
.services
if s
.state_id
== 0]))
725 def get_total_services_warning(self
):
726 return str(len([s
for s
in self
.services
if s
.state_id
== 1]))
729 def get_total_services_critical(self
):
730 return str(len([s
for s
in self
.services
if s
.state_id
== 2]))
733 def get_total_services_unknown(self
):
734 return str(len([s
for s
in self
.services
if s
.state_id
== 3]))
737 def get_ack_author_name(self
):
738 if self
.acknowledgement
== None:
740 return self
.acknowledgement
.author
743 def get_ack_comment(self
):
744 if self
.acknowledgement
== None:
746 return self
.acknowledgement
.comment
749 def get_check_command(self
):
750 return self
.check_command
.get_name()
753 # For get a nice name
755 if not self
.is_tpl():
756 return self
.host_name
761 # For debugin purpose only
762 def get_dbg_name(self
):
763 return self
.host_name
766 # Say if we got the other in one of your dep list
767 def is_linked_with_host(self
, other
):
768 for (h
, status
, type, timeperiod
, inherits_parent
) in self
.act_depend_of
:
774 # Delete all links in the act_depend_of list of self and other
775 def del_host_act_dependancy(self
, other
):
777 # First we remove in my list
778 for (h
, status
, type, timeperiod
, inherits_parent
) in self
.act_depend_of
:
780 to_del
.append( (h
, status
, type, timeperiod
, inherits_parent
))
782 self
.act_depend_of
.remove(t
)
784 #And now in the father part
786 for (h
, status
, type, timeperiod
, inherits_parent
) in other
.act_depend_of_me
:
788 to_del
.append( (h
, status
, type, timeperiod
, inherits_parent
) )
790 other
.act_depend_of_me
.remove(t
)
793 # Add a dependancy for action event handler, notification, etc)
794 # and add ourself in it's dep list
795 def add_host_act_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
796 # I add him in MY list
797 self
.act_depend_of
.append( (h
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
798 # And I add me in it's list
799 h
.act_depend_of_me
.append( (self
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
801 # And the parent/child dep lists too
802 h
.register_son_in_parent_child_dependencies(self
)
805 # Register the dependancy between 2 service for action (notification etc)
806 # but based on a BUSINESS rule, so on fact:
807 # ERP depend on database, so we fill just database.act_depend_of_me
808 # because we will want ERP mails to go on! So call this
809 # on the database service with the srv=ERP service
810 def add_business_rule_act_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
811 # first I add the other the I depend on in MY list
812 # self.act_depend_of.append( (srv, status, 'logic_dep',
813 # timeperiod, inherits_parent) )
814 # I only register so he know that I WILL be a inpact
815 self
.act_depend_of_me
.append( (h
, status
, 'business_dep',
816 timeperiod
, inherits_parent
) )
818 # And the parent/child dep lists too
819 self
.register_son_in_parent_child_dependencies(h
)
822 # Add a dependancy for check (so before launch)
823 def add_host_chk_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
824 # I add him in MY list
825 self
.chk_depend_of
.append( (h
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
826 # And I add me in it's list
827 h
.chk_depend_of_me
.append( (self
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
829 # And we fill parent/childs dep for brok purpose
830 # Here self depend on h
831 h
.register_son_in_parent_child_dependencies(self
)
834 # Add one of our service to services (at linkify)
835 def add_service_link(self
, service
):
836 self
.services
.append(service
)
839 # Set unreachable : all our parents are down!
840 # We have a special state, but state was already set, we just need to
841 # update it. We are no DOWN, we are UNREACHABLE and
842 # got a state id is 2
843 def set_unreachable(self
):
846 self
.state
= 'UNREACHABLE'
847 self
.last_time_unreachable
= int(now
)
850 # We just go an impact, so we go unreachable
851 # But only if we enable this stte change in the conf
852 def set_impact_state(self
):
854 if cls
.enable_problem_impacts_states_change
:
855 # Keep a trace of the old state (problem came back before
857 self
.state_before_impact
= self
.state
858 self
.state_id_before_impact
= self
.state_id
859 # This flag will know if we overide the impact state
860 self
.state_changed_since_impact
= False
861 self
.state
= 'UNREACHABLE'#exit code UNDETERMINED
865 # Ok, we are no more an impact, if no news checks
866 # overide the impact state, we came back to old
868 # And only if impact state change is set in configuration
869 def unset_impact_state(self
):
871 if cls
.enable_problem_impacts_states_change
and not self
.state_changed_since_impact
:
872 self
.state
= self
.state_before_impact
873 self
.state_id
= self
.state_id_before_impact
876 # set the state in UP, DOWN, or UNDETERMINED
877 # with the status of a check. Also update last_state
878 def set_state_from_exit_status(self
, status
):
880 self
.last_state_update
= now
882 # we should put in last_state the good last state:
883 # if not just change the state by an problem/impact
884 # we can take current state. But if it's the case, the
885 # real old state is self.state_before_impact (it's teh TRUE
887 # And only if we enable the impact state change
889 if cls
.enable_problem_impacts_states_change
and self
.is_impact
and not self
.state_changed_since_impact
:
890 self
.last_state
= self
.state_before_impact
892 self
.last_state
= self
.state
897 self
.last_time_up
= int(self
.last_state_update
)
899 elif status
in (1, 2, 3):
902 self
.last_time_down
= int(self
.last_state_update
)
905 self
.state
= 'DOWN'#exit code UNDETERMINED
907 self
.last_time_down
= int(self
.last_state_update
)
909 if state_code
in self
.flap_detection_options
:
910 self
.add_flapping_change(self
.state
!= self
.last_state
)
911 if self
.state
!= self
.last_state
:
912 self
.last_state_change
= self
.last_state_update
913 self
.duration_sec
= now
- self
.last_state_change
916 # See if status is status. Can be low of high format (o/UP, d/DOWN, ...)
917 def is_state(self
, status
):
918 if status
== self
.state
:
921 elif status
== 'o' and self
.state
== 'UP':
923 elif status
== 'd' and self
.state
== 'DOWN':
925 elif status
== 'u' and self
.state
== 'UNREACHABLE':
930 # The last time when the state was not UP
931 def last_time_non_ok_or_up(self
):
932 if self
.last_time_down
> self
.last_time_up
:
933 last_time_non_up
= self
.last_time_down
936 return last_time_non_up
939 # Add a log entry with a HOST ALERT like:
940 # HOST ALERT: server;DOWN;HARD;1;I don't know what to say...
941 def raise_alert_log_entry(self
):
942 logger
.log('HOST ALERT: %s;%s;%s;%d;%s' % (self
.get_name(), self
.state
, self
.state_type
, self
.attempt
, self
.output
))
945 # Add a log entry with a Freshness alert like:
946 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
947 # I'm forcing an immediate check of the host.
948 def raise_freshness_log_entry(self
, t_stale_by
, t_threshold
):
949 logger
.log("Warning: The results of host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the host." \
950 % (self
.get_name(), format_t_into_dhms_format(t_stale_by
), format_t_into_dhms_format(t_threshold
)))
953 # Raise a log entry with a Notification alert like
954 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
955 def raise_notification_log_entry(self
, n
):
957 command
= n
.command_call
958 if n
.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
959 state
= '%s (%s)' % (n
.type, self
.state
)
962 if self
.__class
__.log_notifications
:
963 logger
.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact
.get_name(), self
.get_name(), state
, \
964 command
.get_name(), self
.output
))
966 # Raise a log entry with a Eventhandler alert like
967 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
968 def raise_event_handler_log_entry(self
, command
):
969 if self
.__class
__.log_event_handlers
:
970 logger
.log("HOST EVENT HANDLER: %s;%s;%s;%s;%s" % (self
.get_name(), self
.state
, self
.state_type
, self
.attempt
, \
974 #Raise a log entry with FLAPPING START alert like
975 #HOST FLAPPING ALERT: server;STARTED; Host appears to have started flapping (50.6% change >= 50.0% threshold)
976 def raise_flapping_start_log_entry(self
, change_ratio
, threshold
):
977 logger
.log("HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%.1f% change >= %.1% threshold)" % \
978 (self
.get_name(), change_ratio
, threshold
))
981 #Raise a log entry with FLAPPING STOP alert like
982 #HOST FLAPPING ALERT: server;STOPPED; host appears to have stopped flapping (23.0% change < 25.0% threshold)
983 def raise_flapping_stop_log_entry(self
, change_ratio
, threshold
):
984 logger
.log("HOST FLAPPING ALERT: %s;STOPPED; Host appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
985 (self
.get_name(), change_ratio
, threshold
))
988 #If there is no valid time for next check, raise a log entry
989 def raise_no_next_check_log_entry(self
):
990 logger
.log("Warning : I cannot schedule the check for the host '%s' because there is not future valid time" % \
993 #Raise a log entry when a downtime begins
994 #HOST DOWNTIME ALERT: test_host_0;STARTED; Host has entered a period of scheduled downtime
995 def raise_enter_downtime_log_entry(self
):
996 logger
.log("HOST DOWNTIME ALERT: %s;STARTED; Host has entered a period of scheduled downtime" % \
1000 #Raise a log entry when a downtime has finished
1001 #HOST DOWNTIME ALERT: test_host_0;STOPPED; Host has exited from a period of scheduled downtime
1002 def raise_exit_downtime_log_entry(self
):
1003 logger
.log("HOST DOWNTIME ALERT: %s;STOPPED; Host has exited from a period of scheduled downtime" % \
1007 #Raise a log entry when a downtime prematurely ends
1008 #HOST DOWNTIME ALERT: test_host_0;CANCELLED; Service has entered a period of scheduled downtime
1009 def raise_cancel_downtime_log_entry(self
):
1010 logger
.log("HOST DOWNTIME ALERT: %s;CANCELLED; Scheduled downtime for host has been cancelled." % \
1015 #Launch if check is waitconsume==first time
1016 #and if c.status is in self.stalking_options
1017 def manage_stalking(self
, c
):
1019 if c
.status
== 'waitconsume':
1020 if c
.exit_status
== 0 and 'o' in self
.stalking_options
:
1022 elif c
.exit_status
== 1 and 'd' in self
.stalking_options
:
1024 elif c
.exit_status
== 2 and 'd' in self
.stalking_options
:
1026 elif c
.exit_status
== 3 and 'u' in self
.stalking_options
:
1028 if c
.output
!= self
.output
:
1031 logger
.log("Stalking %s : %s" % (self
.get_name(), self
.output
))
1034 #fill act_depend_of with my parents (so network dep)
1035 #and say parents they impact me, no timeperiod and folow parents of course
1036 def fill_parents_dependancie(self
):
1037 for parent
in self
.parents
:
1038 if parent
is not None:
1039 #I add my parent in my list
1040 self
.act_depend_of
.append( (parent
, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
1042 #And I register myself in my parent list too
1043 parent
.register_child(self
)
1045 # And add the parent/child dep filling too, for broking
1046 parent
.register_son_in_parent_child_dependencies(self
)
1049 # Register a child in our lists
1050 def register_child(self
, child
):
1051 # We've got 2 list : a list for our child
1052 # where we just put the pointer, it's jsut for broking
1053 # and anotehr with all data, useful for 'running' part
1054 self
.childs
.append(child
)
1055 self
.act_depend_of_me
.append( (child
, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
1058 #Give data for checks's macros
1059 def get_data_for_checks(self
):
1062 #Give data for event handler's macro
1063 def get_data_for_event_handler(self
):
1066 #Give data for notifications'n macros
1067 def get_data_for_notifications(self
, contact
, n
):
1068 return [self
, contact
, n
]
1071 #See if the notification is launchable (time is OK and contact is OK too)
1072 def notification_is_blocked_by_contact(self
, n
, contact
):
1073 return not contact
.want_host_notification(self
.last_chk
, self
.state
, n
.type, self
.criticity
)
1077 def get_duration_sec(self
):
1078 return str(int(self
.duration_sec
))
1081 def get_duration(self
):
1082 m
, s
= divmod(self
.duration_sec
, 60)
1083 h
, m
= divmod(m
, 60)
1084 return "%02dh %02dm %02ds" % (h
, m
, s
)
1087 #Check if a notification for this host is suppressed at this time
1088 #This is a check at the host level. Do not look at contacts here
1089 def notification_is_blocked_by_item(self
, type, t_wished
= None):
1090 if t_wished
== None:
1091 t_wished
= time
.time()
1094 # forced notification -> false
1095 # custom notification -> false
1097 # Block if notifications are program-wide disabled
1098 if not self
.enable_notifications
:
1101 # Does the notification period allow sending out this notification?
1102 if not self
.notification_period
.is_time_valid(t_wished
):
1105 # Block if notifications are disabled for this host
1106 if not self
.notifications_enabled
:
1109 # Block if the current status is in the notification_options d,u,r,f,s
1110 if 'n' in self
.notification_options
:
1113 if type in ('PROBLEM', 'RECOVERY'):
1114 if self
.state
== 'DOWN' and not 'd' in self
.notification_options
:
1116 if self
.state
== 'UP' and not 'r' in self
.notification_options
:
1118 if self
.state
== 'UNREACHABLE' and not 'u' in self
.notification_options
:
1120 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
1121 and not 'f' in self
.notification_options
):
1123 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
1124 and not 's' in self
.notification_options
):
1127 # Acknowledgements make no sense when the status is ok/up
1128 if type == 'ACKNOWLEDGEMENT':
1129 if self
.state
== self
.ok_up
:
1133 if type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
1134 # todo block if not notify_on_flapping
1135 if self
.scheduled_downtime_depth
> 0:
1138 # When in deep downtime, only allow end-of-downtime notifications
1139 # In depth 1 the downtime just started and can be notified
1140 if self
.scheduled_downtime_depth
> 1 and not type in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
1143 # Block if in a scheduled downtime and a problem arises
1144 if self
.scheduled_downtime_depth
> 0 and type in ('PROBLEM', 'RECOVERY'):
1147 # Block if the status is SOFT
1148 if self
.state_type
== 'SOFT' and type == 'PROBLEM':
1151 # Block if the problem has already been acknowledged
1152 if self
.problem_has_been_acknowledged
and type != 'ACKNOWLEDGEMENT':
1156 if self
.is_flapping
:
1162 #Get a oc*p command if item has obsess_over_*
1163 #command. It must be enabled locally and globally
1164 def get_obsessive_compulsive_processor_command(self
):
1165 cls
= self
.__class
__
1166 if not cls
.obsess_over
or not self
.obsess_over_host
:
1170 data
= self
.get_data_for_event_handler()
1171 cmd
= m
.resolve_command(cls
.ochp_command
, data
)
1172 e
= EventHandler(cmd
, timeout
=cls
.ochp_timeout
)
1174 #ok we can put it in our temp action queue
1175 self
.actions
.append(e
)
1181 name_property
= "host_name" #use for the search by name
1182 inner_class
= Host
#use for know what is in items
1185 #prepare_for_conf_sending to flatten some properties
1186 def prepare_for_sending(self
):
1188 h
.prepare_for_conf_sending()
1191 #Create link between elements:
1192 #hosts -> timeperiods
1193 #hosts -> hosts (parents, etc)
1194 #hosts -> commands (check_command)
1196 def linkify(self
, timeperiods
=None, commands
=None, contacts
=None, realms
=None, resultmodulations
=None, escalations
=None, hostgroups
=None):
1197 self
.linkify_with_timeperiods(timeperiods
, 'notification_period')
1198 self
.linkify_with_timeperiods(timeperiods
, 'check_period')
1199 self
.linkify_with_timeperiods(timeperiods
, 'maintenance_period')
1200 self
.linkify_h_by_h()
1201 self
.linkify_h_by_hg(hostgroups
)
1202 self
.linkify_one_command_with_commands(commands
, 'check_command')
1203 self
.linkify_one_command_with_commands(commands
, 'event_handler')
1205 self
.linkify_with_contacts(contacts
)
1206 self
.linkify_h_by_realms(realms
)
1207 self
.linkify_with_resultmodulations(resultmodulations
)
1208 #WARNING: all escalations will not be link here
1209 #(just the escalation here, not serviceesca or hostesca).
1210 #This last one will be link in escalations linkify.
1211 self
.linkify_with_escalations(escalations
)
1214 #Fill adress by host_name if not set
1215 def fill_predictive_missing_parameters(self
):
1217 h
.fill_predictive_missing_parameters()
1220 #Link host with hosts (parents)
1221 def linkify_h_by_h(self
):
1224 #The new member list
1226 for parent
in parents
:
1227 parent
= parent
.strip()
1228 p
= self
.find_by_name(parent
)
1230 new_parents
.append(p
)
1232 err
= "Error : the parent '%s' on host '%s' is unknown!" % (parent
, h
.get_name())
1233 self
.configuration_errors
.append(err
)
1234 #print "Me,", h.host_name, "define my parents", new_parents
1235 #We find the id, we remplace the names
1236 h
.parents
= new_parents
1239 #Link with realms and set a default realm if none
1240 def linkify_h_by_realms(self
, realms
):
1241 default_realm
= None
1243 if getattr(r
, 'default', False):
1245 if default_realm
== None:
1246 print "Error : there is no default realm defined!"
1248 #print h.get_name(), h.realm
1250 p
= realms
.find_by_name(h
.realm
.strip())
1253 print "Host", h
.get_name(), "is in the realm", p
.get_name()
1255 err
= "Error : the host %s got a invalid realm (%s)!" % (h
.get_name(), h
.realm
)
1256 h
.configuration_errors
.append(err
)
1259 #print "Notice : applying default realm %s to host %s" % (default_realm.get_name(), h.get_name())
1260 h
.realm
= default_realm
1261 h
.got_default_realm
= True
1264 #We look for hostgroups property in hosts and
1266 def linkify_h_by_hg(self
, hostgroups
):
1267 #Hostgroups property need to be fullfill for got the informations
1268 #self.apply_partial_inheritance('hostgroups')
1269 #self.apply_partial_inheritance('contact_groups')
1271 #Register host in the hostgroups
1275 if hasattr(h
, 'hostgroups') and h
.hostgroups
!= '':
1276 hgs
= h
.hostgroups
.split(',')
1278 hg_name
= hg_name
.strip()
1279 hg
= hostgroups
.find_by_name(hg_name
)
1281 new_hostgroups
.append(hg
)
1283 err
= "Error : the hostgroup '%s' of the host '%s' is unknown" % (hg_name
, h
.host_name
)
1284 h
.configuration_errors
.append(err
)
1285 h
.hostgroups
= new_hostgroups
1289 #It's used to change old Nagios2 names to
1291 def old_properties_names_to_new(self
):
1293 h
.old_properties_names_to_new()
1297 #We look for hostgroups property in hosts and
1298 def explode(self
, hostgroups
, contactgroups
):
1299 #Hostgroups property need to be fullfill for got the informations
1300 #self.apply_partial_inheritance('hostgroups')
1301 #self.apply_partial_inheritance('contact_groups')
1303 #Register host in the hostgroups
1305 if not h
.is_tpl() and hasattr(h
, 'host_name'):
1307 if hasattr(h
, 'hostgroups'):
1308 hgs
= h
.hostgroups
.split(',')
1310 hostgroups
.add_member(hname
, hg
.strip())
1312 #items::explode_contact_groups_into_contacts
1313 #take all contacts from our contact_groups into our contact property
1314 self
.explode_contact_groups_into_contacts(contactgroups
)
1318 #Create depenancies:
1319 #Depencies at the host level: host parent
1320 def apply_dependancies(self
):
1322 h
.fill_parents_dependancie()
1325 #Parent graph: use to find quickly relations between all host, and loop
1326 #return True if tehre is a loop
1327 def no_loop_in_parents(self
):
1328 #Ok, we say "from now, no loop :) "
1331 #Create parent graph
1334 #With all hosts as nodes
1343 parents
.add_edge(p
, h
)
1345 #Now get the list of all hosts in a loop
1346 host_in_loops
= parents
.loop_check()
1348 #and raise errors about it
1349 for h
in host_in_loops
:
1350 logger
.log("Error: The host '%s' is part of a circular parent/child chain!" % h
.get_name())
1356 #Return a list of the host_name of the hosts
1357 #that gotthe template with name=tpl_name
1358 def find_hosts_that_use_template(self
, tpl_name
):
1360 #first find the template
1363 #Look fortemplate with the good name
1364 if h
.is_tpl() and hasattr(h
, 'name') and h
.name
== tpl_name
:
1367 #If we find noone, we return nothing (easy case:) )
1371 #Ok, we find the tpl
1373 if tpl
in h
.templates
and hasattr(h
, 'host_name'):
1374 res
.append(h
.host_name
)
1379 # Will create all business tree for the
1381 def create_business_rules(self
, hosts
, services
):
1383 h
.create_business_rules(hosts
, services
)
1386 # Will link all business service/host with theirs
1387 # dep for problem/impact link
1388 def create_business_rules_dependencies(self
):
1390 h
.create_business_rules_dependencies()