2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
6 #This file is part of Shinken.
8 #Shinken is free software: you can redistribute it and/or modify
9 #it under the terms of the GNU Affero General Public License as published by
10 #the Free Software Foundation, either version 3 of the License, or
11 #(at your option) any later version.
13 #Shinken is distributed in the hope that it will be useful,
14 #but WITHOUT ANY WARRANTY; without even the implied warranty of
15 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 #GNU Affero General Public License for more details.
18 #You should have received a copy of the GNU Affero General Public License
19 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
21 """ This is the main class for the Host. In fact it's mainly
22 about the configuration part. for the running one, it's better
23 to look at the schedulingitem class that manage all
24 scheduling/consome check smart things :)
28 import re
#for keys generator
30 from shinken
.autoslots
import AutoSlots
31 from shinken
.item
import Items
32 from shinken
.schedulingitem
import SchedulingItem
33 from shinken
.util
import to_int
, to_float
, to_char
, to_split
, to_bool
, format_t_into_dhms_format
, to_hostnames_list
, get_obj_name
, to_svc_hst_distinct_lists
, to_list_string_of_names
, expand_with_macros
34 from shinken
.property import UnusedProp
, BoolProp
, IntegerProp
, FloatProp
, CharProp
, StringProp
, ListProp
35 from shinken
.graph
import Graph
36 from shinken
.macroresolver
import MacroResolver
37 from shinken
.eventhandler
import EventHandler
38 from shinken
.log
import logger
40 class Host(SchedulingItem
):
41 #AutoSlots create the __slots__ with properties and
42 #running_properties names
43 __metaclass__
= AutoSlots
45 id = 1 #0 is reserved for host (primary node for parents)
50 # properties defined by configuration
51 # *required : is required in conf
52 # *default : default value if no set in conf
53 # *pythonize : function to call when transfort string to python object
54 # *fill_brok : if set, send to broker. there are two categories: full_status for initial and update status, check_result for check results
55 # *no_slots : do not take this property for __slots__
56 # Only for the inital call
57 # conf_send_preparation : if set, will pass the property to this function. It's used to "flatten"
58 # some dangerous properties like realms that are too 'linked' to be send like that.
59 # brok_transformation : if set, will call the function with the value of the property
60 # the major times it will be to flatten the data (like realm_name instead of the realm object).
62 'host_name': StringProp(fill_brok
=['full_status', 'check_result', 'next_schedule']),
63 'alias': StringProp(fill_brok
=['full_status']),
64 'display_name': StringProp(
66 fill_brok
=['full_status']),
67 'address': StringProp(fill_brok
=['full_status']),
69 brok_transformation
=to_hostnames_list
,
71 fill_brok
=['full_status']),
72 'hostgroups': StringProp(
73 brok_transformation
=to_list_string_of_names
,
75 fill_brok
=['full_status']),
76 'check_command': StringProp(
77 default
='_internal_host_up',
78 fill_brok
=['full_status']),
79 'initial_state': CharProp(
81 fill_brok
=['full_status']),
82 'max_check_attempts': IntegerProp(fill_brok
=['full_status']),
83 'check_interval': IntegerProp(
85 fill_brok
=['full_status']),
86 'retry_interval': IntegerProp(
88 fill_brok
=['full_status']),
89 'active_checks_enabled': BoolProp(
91 fill_brok
=['full_status']),
92 'passive_checks_enabled': BoolProp(
94 fill_brok
=['full_status']),
95 'check_period': StringProp(fill_brok
=['full_status']),
96 'obsess_over_host': BoolProp(
98 fill_brok
=['full_status']),
99 'check_freshness': BoolProp(
101 fill_brok
=['full_status']),
102 'freshness_threshold': IntegerProp(
104 fill_brok
=['full_status']),
105 'event_handler': StringProp(
107 fill_brok
=['full_status']),
108 'event_handler_enabled': BoolProp(
110 fill_brok
=['full_status']),
111 'low_flap_threshold': IntegerProp(
113 fill_brok
=['full_status']),
114 'high_flap_threshold': IntegerProp(
116 fill_brok
=['full_status']),
117 'flap_detection_enabled': BoolProp(
119 fill_brok
=['full_status']),
120 'flap_detection_options': ListProp(
122 fill_brok
=['full_status']),
123 'process_perf_data': BoolProp(
125 fill_brok
=['full_status']),
126 'retain_status_information': BoolProp(
128 fill_brok
=['full_status']),
129 'retain_nonstatus_information': BoolProp(
131 fill_brok
=['full_status']),
132 'contacts': StringProp(
134 fill_brok
=['full_status']),
135 'contact_groups': StringProp(
137 fill_brok
=['full_status']),
138 'notification_interval': IntegerProp(
140 fill_brok
=['full_status']),
141 'first_notification_delay': IntegerProp(
143 fill_brok
=['full_status']),
144 'notification_period': StringProp(fill_brok
=['full_status']),
145 'notification_options': ListProp(
147 fill_brok
=['full_status']),
148 'notifications_enabled': BoolProp(
150 fill_brok
=['full_status']),
151 'stalking_options': ListProp(
153 fill_brok
=['full_status']),
156 fill_brok
=['full_status']),
157 'notes_url': StringProp(
159 fill_brok
=['full_status']),
160 'action_url': StringProp(
162 fill_brok
=['full_status']),
163 'icon_image': StringProp(
165 fill_brok
=['full_status']),
166 'icon_image_alt': StringProp(
168 fill_brok
=['full_status']),
169 'vrml_image': StringProp(
171 fill_brok
=['full_status']),
172 'statusmap_image': StringProp(
174 fill_brok
=['full_status']),
176 # No slots for this 2 because begin property by a number seems bad
178 '2d_coords': StringProp(
180 fill_brok
=['full_status'],
182 '3d_coords': StringProp(
184 fill_brok
=['full_status'],
186 'failure_prediction_enabled': BoolProp(
188 fill_brok
=['full_status']),
191 # 'fill_brok' is ok because in scheduler it's already
192 # a string from conf_send_preparation
195 fill_brok
=['full_status'],
196 conf_send_preparation
=get_obj_name
),
197 'poller_tag': StringProp(default
=None),
199 'resultmodulations': StringProp(default
=''),
200 'escalations': StringProp(
202 fill_brok
=['full_status']),
203 'maintenance_period': StringProp(
205 fill_brok
=['full_status']),
208 'criticity': IntegerProp(
210 fill_brok
=['full_status']),
214 # properties set only for running purpose
215 # retention : save/load this property from retention
216 running_properties
= {
217 'last_chk': IntegerProp(
219 fill_brok
=['full_status', 'check_result'],
221 'next_chk': IntegerProp(
223 fill_brok
=['full_status', 'next_schedule']),
224 'in_checking': BoolProp(
226 fill_brok
=['full_status', 'check_result', 'next_schedule']),
227 'latency': FloatProp(
229 fill_brok
=['full_status', 'check_result'],
231 'attempt': IntegerProp(
233 fill_brok
=['full_status', 'check_result'],
237 fill_brok
=['full_status'],
239 'state_id': IntegerProp(
241 fill_brok
=['full_status', 'check_result'],
243 'state_type': StringProp(
245 fill_brok
=['full_status'],
247 'state_type_id': IntegerProp(
249 fill_brok
=['full_status', 'check_result'],
251 'current_event_id': StringProp(
253 fill_brok
=['full_status', 'check_result'],
255 'last_event_id': IntegerProp(
257 fill_brok
=['full_status', 'check_result'],
259 'last_state': StringProp(
261 fill_brok
=['full_status'],
263 'last_state_id': IntegerProp(
265 fill_brok
=['full_status'],
267 'last_state_change': FloatProp(
269 fill_brok
=['full_status'],
271 'last_hard_state_change': FloatProp(
273 fill_brok
=['full_status'],
275 'last_hard_state': StringProp(
277 fill_brok
=['full_status'],
279 'last_hard_state_id' : IntegerProp(
281 fill_brok
=['full_status'],
283 'last_time_up': IntegerProp(
284 default
=int(time
.time()),
285 fill_brok
=['full_status', 'check_result'],
287 'last_time_down': IntegerProp(
288 default
=int(time
.time()),
289 fill_brok
=['full_status', 'check_result'],
291 'last_time_unreachable': IntegerProp(
292 default
=int(time
.time()),
293 fill_brok
=['full_status', 'check_result'],
295 'duration_sec': IntegerProp(
297 fill_brok
=['full_status'],
299 'output': StringProp(
301 fill_brok
=['full_status', 'check_result'],
303 'long_output': StringProp(
305 fill_brok
=['full_status', 'check_result'],
307 'is_flapping': BoolProp(
309 fill_brok
=['full_status'],
311 'flapping_comment_id': IntegerProp(
313 fill_brok
=['full_status'],
315 # No broks for _depend_of because of to much links to hosts/services
316 # dependencies for actions like notif of event handler, so AFTER check return
317 'act_depend_of': StringProp(default
=[]),
319 # dependencies for checks raise, so BEFORE checks
320 'chk_depend_of': StringProp(default
=[]),
322 # elements that depend of me, so the reverse than just uppper
323 'act_depend_of_me': StringProp(default
=[]),
325 # elements that depend of me
326 'chk_depend_of_me': StringProp(default
=[]),
328 'last_state_update': StringProp(
330 fill_brok
=['full_status'],
333 # no brok ,to much links
334 'services': StringProp(default
=[]),
336 # No broks, it's just internal, and checks have too links
337 'checks_in_progress': StringProp(default
=[]),
339 # No broks, it's just internal, and checks have too links
340 'notifications_in_progress': StringProp(
343 'downtimes': StringProp(
345 fill_brok
=['full_status'],
347 'comments': StringProp(
349 fill_brok
=['full_status'],
351 'flapping_changes': StringProp(
353 fill_brok
=['full_status'],
355 'percent_state_change': FloatProp(
357 fill_brok
=['full_status'],
359 'problem_has_been_acknowledged': BoolProp(
361 fill_brok
=['full_status'],
363 'acknowledgement': StringProp(
366 'acknowledgement_type': IntegerProp(
368 fill_brok
=['full_status', 'check_result'],
370 'check_type': IntegerProp(
372 fill_brok
=['full_status', 'check_result'],
374 'has_been_checked': IntegerProp(
376 fill_brok
=['full_status', 'check_result'],
378 'should_be_scheduled': IntegerProp(
380 fill_brok
=['full_status'],
382 'last_problem_id': IntegerProp(
384 fill_brok
=['full_status', 'check_result'],
386 'current_problem_id': IntegerProp(
388 fill_brok
=['full_status', 'check_result'],
390 'execution_time': FloatProp(
392 fill_brok
=['full_status', 'check_result'],
394 'last_notification': FloatProp(
396 fill_brok
=['full_status'],
398 'current_notification_number': IntegerProp(
400 fill_brok
=['full_status'],
402 'current_notification_id': IntegerProp(
404 fill_brok
=['full_status'],
406 'check_flapping_recovery_notification': BoolProp(
408 fill_brok
=['full_status'],
410 'scheduled_downtime_depth': IntegerProp(
412 fill_brok
=['full_status'],
414 'pending_flex_downtime': IntegerProp(
416 fill_brok
=['full_status'],
418 'timeout': IntegerProp(
420 fill_brok
=['full_status', 'check_result'],
422 'start_time': IntegerProp(
424 fill_brok
=['full_status', 'check_result'],
426 'end_time': IntegerProp(
428 fill_brok
=['full_status', 'check_result'],
430 'early_timeout': IntegerProp(
432 fill_brok
=['full_status', 'check_result'],
434 'return_code': IntegerProp(
436 fill_brok
=['full_status', 'check_result'],
438 'perf_data': StringProp(
440 fill_brok
=['full_status', 'check_result'],
442 'last_perf_data': StringProp(
445 'customs': StringProp(default
={}, fill_brok
=['full_status']),
447 'got_default_realm' : BoolProp(default
=False),
449 # use for having all contacts we have notified
450 'notified_contacts': StringProp(
453 'in_scheduled_downtime': BoolProp(
456 'in_scheduled_downtime_during_last_check': BoolProp(
460 # put here checks and notif raised
461 'actions': StringProp(
463 # and here broks raised
467 # For knowing with which elements we are in relation
469 # childs are the hosts that have US as parent, so
471 'childs': StringProp(
472 brok_transformation
=to_hostnames_list
,
474 fill_brok
=['full_status']),
475 # Here it's the elements we are depending on
476 # so our parents as network relation, or a host
477 # we are depending in a hostdependency
478 # or even if we are businesss based.
479 'parent_dependencies' : StringProp(
480 brok_transformation
=to_svc_hst_distinct_lists
,
482 fill_brok
=['full_status']),
483 # Here it's the guys taht depend on us. So it's the total
484 # oposite of the parent_dependencies
485 'child_dependencies': StringProp(
486 brok_transformation
=to_svc_hst_distinct_lists
,
488 fill_brok
=['full_status']),
490 # All errors and warning raised during the configuration parsing
491 # and taht will raised real warning/errors during the is_correct
492 'configuration_warnings': StringProp(default
=[]),
493 'configuration_errors': StringProp(default
=[]),
495 ### Problem/impact part
496 'is_problem': StringProp(
498 fill_brok
=['full_status']),
499 'is_impact': StringProp(
501 fill_brok
=['full_status']),
502 # the save value of our criticity for "problems"
503 'my_own_criticity': IntegerProp(default
=-1),
505 # list of problems that make us an impact
506 'source_problems': StringProp(
507 brok_transformation
=to_svc_hst_distinct_lists
,
509 fill_brok
=['full_status']),
511 # list of the impact I'm the cause of
512 'impacts': StringProp(
513 brok_transformation
=to_svc_hst_distinct_lists
,
515 fill_brok
=['full_status']),
517 # keep a trace of the old state before being an impact
518 'state_before_impact': StringProp(default
='PENDING'),
519 # keep a trace of the old state id before being an impact
520 'state_id_before_impact': StringProp(default
=0),
521 # if the state change, we know so we do not revert it
522 'state_changed_since_impact': StringProp(default
=False),
524 #BUSINESS CORRELATOR PART
525 # Say if we are business based rule or not
526 'got_business_rule' : BoolProp(default
=False, fill_brok
=['full_status']),
527 # Our Dependency node for the business rule
528 'business_rule' : StringProp(default
=None),
531 # Hosts macros and prop that give the information
532 # the prop can be callable or not
533 macros
= {'HOSTNAME' : 'host_name',
534 'HOSTDISPLAYNAME' : 'display_name',
535 'HOSTALIAS' : 'alias',
536 'HOSTADDRESS' : 'address',
537 'HOSTSTATE' : 'state',
538 'HOSTSTATEID' : 'state_id',
539 'LASTHOSTSTATE' : 'last_state',
540 'LASTHOSTSTATEID' : 'last_state_id',
541 'HOSTSTATETYPE' : 'state_type',
542 'HOSTATTEMPT' : 'attempt',
543 'MAXHOSTATTEMPTS' : 'max_check_attempts',
544 'HOSTEVENTID' : 'current_event_id',
545 'LASTHOSTEVENTID' : 'last_event_id',
546 'HOSTPROBLEMID' : 'current_problem_id',
547 'LASTHOSTPROBLEMID' : 'last_problem_id',
548 'HOSTLATENCY' : 'latency',
549 'HOSTEXECUTIONTIME' : 'execution_time',
550 'HOSTDURATION' : 'get_duration',
551 'HOSTDURATIONSEC' : 'get_duration_sec',
552 'HOSTDOWNTIME' : 'get_downtime',
553 'HOSTPERCENTCHANGE' : 'percent_state_change',
554 'HOSTGROUPNAME' : 'get_groupname',
555 'HOSTGROUPNAMES' : 'get_groupnames',
556 'LASTHOSTCHECK' : 'last_chk',
557 'LASTHOSTSTATECHANGE' : 'last_state_change',
558 'LASTHOSTUP' : 'last_time_up',
559 'LASTHOSTDOWN' : 'last_time_down',
560 'LASTHOSTUNREACHABLE' : 'last_time_unreachable',
561 'HOSTOUTPUT' : 'output',
562 'LONGHOSTOUTPUT' : 'long_output',
563 'HOSTPERFDATA' : 'perf_data',
564 'LASTHOSTPERFDATA' : 'last_perf_data',
565 'HOSTCHECKCOMMAND' : 'get_check_command',
566 'HOSTACKAUTHOR' : 'get_ack_author_name',
567 'HOSTACKAUTHORNAME' : 'get_ack_author_name',
568 'HOSTACKAUTHORALIAS' : 'get_ack_author_name',
569 'HOSTACKCOMMENT' : 'get_ack_comment',
570 'HOSTACTIONURL' : 'action_url',
571 'HOSTNOTESURL' : 'notes_url',
572 'HOSTNOTES' : 'notes',
573 'TOTALHOSTSERVICES' : 'get_total_services',
574 'TOTALHOSTSERVICESOK' : 'get_total_services_ok',
575 'TOTALHOSTSERVICESWARNING' : 'get_total_services_warning',
576 'TOTALHOSTSERVICESUNKNOWN' : 'get_total_services_unknown',
577 'TOTALHOSTSERVICESCRITICAL' : 'get_total_services_critical'
581 # This tab is used to transform old parameters name into new ones
582 # so from Nagios2 format, to Nagios3 ones
584 'normal_check_interval' : 'check_interval',
585 'retry_check_interval' : 'retry_interval'
593 # Call by picle for data-ify the host
594 # we do a dict because list are too dangerous for
595 # retention save and co :( even if it's more
597 # The setstate function do the inverse
598 def __getstate__(self
):
600 # id is not in *_properties
601 res
= {'id' : self
.id}
602 for prop
in cls
.properties
:
603 if hasattr(self
, prop
):
604 res
[prop
] = getattr(self
, prop
)
605 for prop
in cls
.running_properties
:
606 if hasattr(self
, prop
):
607 res
[prop
] = getattr(self
, prop
)
611 # Inversed funtion of getstate
612 def __setstate__(self
, state
):
614 self
.id = state
['id']
615 for prop
in cls
.properties
:
617 setattr(self
, prop
, state
[prop
])
618 for prop
in cls
.running_properties
:
620 setattr(self
, prop
, state
[prop
])
624 # Fill adresse with host_name if not already set
625 def fill_predictive_missing_parameters(self
):
626 if hasattr(self
, 'host_name') and not hasattr(self
, 'address'):
627 self
.address
= self
.host_name
628 if hasattr(self
, 'host_name') and not hasattr(self
, 'alias'):
629 self
.alias
= self
.host_name
633 # Check is required prop are set:
634 # contacts OR contactgroups is need
635 def is_correct(self
):
636 state
= True #guilty or not? :)
639 special_properties
= ['contacts', 'contact_groups', 'check_period', \
640 'notification_interval', 'check_period']
641 for prop
in cls
.properties
:
642 if prop
not in special_properties
:
643 if not hasattr(self
, prop
) and cls
.properties
[prop
].required
:
644 logger
.log("%s : I do not have %s" % (self
.get_name(), prop
))
645 state
= False #Bad boy...
647 # Raised all previously saw errors like unknown contacts and co
648 if self
.configuration_errors
!= []:
650 for err
in self
.configuration_errors
:
653 # Ok now we manage special cases...
654 if not hasattr(self
, 'contacts') and not hasattr(self
, 'contact_groups') and self
.notifications_enabled
== True:
655 logger
.log("%s : I do not have contacts nor contact_groups" % self
.get_name())
657 if not hasattr(self
, 'check_command') or self
.check_command
== None:
658 logger
.log("%s : I've got no check_command" % self
.get_name())
660 # Ok got a command, but maybe it's invalid
662 if not self
.check_command
.is_valid():
663 logger
.log("%s : my check_command %s is invalid" % (self
.get_name(), self
.check_command
.command
))
665 if not hasattr(self
, 'notification_interval') and self
.notifications_enabled
== True:
666 logger
.log("%s : I've got no notification_interval but I've got notifications enabled" % self
.get_name())
668 # If active check is enabled with a check_interval!=0, we must have a check_period
669 if (hasattr(self
, 'active_checks_enabled') and self
.active_checks_enabled
) and (not hasattr(self
, 'check_period') or self
.check_period
== None) and (hasattr(self
, 'check_interval') and self
.check_interval
!=0):
670 logger
.log("%s : My check_period is not correct" % self
.get_name())
672 if not hasattr(self
, 'realm') or self
.realm
== None:
673 logger
.log("%s : My realm is not correct" % self
.get_name())
675 if not hasattr(self
, 'check_period'):
676 self
.check_period
= None
677 if hasattr(self
, 'host_name'):
678 for c
in cls
.illegal_object_name_chars
:
679 if c
in self
.host_name
:
680 logger
.log("%s : My host_name got the caracter %s that is not allowed." % (self
.get_name(), c
))
685 # Search in my service if I've got the service
686 def find_service_by_name(self
, service_description
):
687 for s
in self
.services
:
688 if s
.service_description
== service_description
:
694 def get_total_services(self
):
695 return str(len(self
.services
))
698 def get_total_services_ok(self
):
699 return str(len([s
for s
in self
.services
if s
.state_id
== 0]))
702 def get_total_services_warning(self
):
703 return str(len([s
for s
in self
.services
if s
.state_id
== 1]))
706 def get_total_services_critical(self
):
707 return str(len([s
for s
in self
.services
if s
.state_id
== 2]))
710 def get_total_services_unknown(self
):
711 return str(len([s
for s
in self
.services
if s
.state_id
== 3]))
714 def get_ack_author_name(self
):
715 if self
.acknowledgement
== None:
717 return self
.acknowledgement
.author
720 def get_ack_comment(self
):
721 if self
.acknowledgement
== None:
723 return self
.acknowledgement
.comment
726 def get_check_command(self
):
727 return self
.check_command
.get_name()
730 # For get a nice name
732 if not self
.is_tpl():
733 return self
.host_name
738 # For debugin purpose only
739 def get_dbg_name(self
):
740 return self
.host_name
743 # Add a dependancy for action event handler, notification, etc)
744 # and add ourself in it's dep list
745 def add_host_act_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
746 # I add him in MY list
747 self
.act_depend_of
.append( (h
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
748 # And I add me in it's list
749 h
.act_depend_of_me
.append( (self
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
751 # And the parent/child dep lists too
752 h
.register_son_in_parent_child_dependencies(self
)
755 # Register the dependancy between 2 service for action (notification etc)
756 # but based on a BUSINESS rule, so on fact:
757 # ERP depend on database, so we fill just database.act_depend_of_me
758 # because we will want ERP mails to go on! So call this
759 # on the database service with the srv=ERP service
760 def add_business_rule_act_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
761 # first I add the other the I depend on in MY list
762 # self.act_depend_of.append( (srv, status, 'logic_dep',
763 # timeperiod, inherits_parent) )
764 # I only register so he know that I WILL be a inpact
765 self
.act_depend_of_me
.append( (h
, status
, 'business_dep',
766 timeperiod
, inherits_parent
) )
768 # And the parent/child dep lists too
769 self
.register_son_in_parent_child_dependencies(h
)
772 # Add a dependancy for check (so before launch)
773 def add_host_chk_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
774 # I add him in MY list
775 self
.chk_depend_of
.append( (h
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
776 # And I add me in it's list
777 h
.chk_depend_of_me
.append( (self
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
779 # And we fill parent/childs dep for brok purpose
780 # Here self depend on h
781 h
.register_son_in_parent_child_dependencies(self
)
784 # Add one of our service to services (at linkify)
785 def add_service_link(self
, service
):
786 self
.services
.append(service
)
789 # Set unreachable : all our parents are down!
790 # We have a special state, but state was already set, we just need to
791 # update it. We are no DOWN, we are UNREACHABLE and
792 # got a state id is 2
793 def set_unreachable(self
):
796 self
.state
= 'UNREACHABLE'
797 self
.last_time_unreachable
= int(now
)
800 # We just go an impact, so we go unreachable
801 # But only if we enable this stte change in the conf
802 def set_impact_state(self
):
804 if cls
.enable_problem_impacts_states_change
:
805 # Keep a trace of the old state (problem came back before
807 self
.state_before_impact
= self
.state
808 self
.state_id_before_impact
= self
.state_id
809 # This flag will know if we overide the impact state
810 self
.state_changed_since_impact
= False
811 self
.state
= 'UNREACHABLE'#exit code UNDETERMINED
815 # Ok, we are no more an impact, if no news checks
816 # overide the impact state, we came back to old
818 # And only if impact state change is set in configuration
819 def unset_impact_state(self
):
821 if cls
.enable_problem_impacts_states_change
and not self
.state_changed_since_impact
:
822 self
.state
= self
.state_before_impact
823 self
.state_id
= self
.state_id_before_impact
826 # set the state in UP, DOWN, or UNDETERMINED
827 # with the status of a check. Also update last_state
828 def set_state_from_exit_status(self
, status
):
830 self
.last_state_update
= now
832 # we should put in last_state the good last state:
833 # if not just change the state by an problem/impact
834 # we can take current state. But if it's the case, the
835 # real old state is self.state_before_impact (it's teh TRUE
837 # And only if we enable the impact state change
839 if cls
.enable_problem_impacts_states_change
and self
.is_impact
and not self
.state_changed_since_impact
:
840 self
.last_state
= self
.state_before_impact
842 self
.last_state
= self
.state
847 self
.last_time_up
= int(self
.last_state_update
)
849 elif status
in (1, 2, 3):
852 self
.last_time_down
= int(self
.last_state_update
)
855 self
.state
= 'DOWN'#exit code UNDETERMINED
857 self
.last_time_down
= int(self
.last_state_update
)
859 if state_code
in self
.flap_detection_options
:
860 self
.add_flapping_change(self
.state
!= self
.last_state
)
861 if self
.state
!= self
.last_state
:
862 self
.last_state_change
= self
.last_state_update
863 self
.duration_sec
= now
- self
.last_state_change
866 # See if status is status. Can be low of high format (o/UP, d/DOWN, ...)
867 def is_state(self
, status
):
868 if status
== self
.state
:
871 elif status
== 'o' and self
.state
== 'UP':
873 elif status
== 'd' and self
.state
== 'DOWN':
875 elif status
== 'u' and self
.state
== 'UNREACHABLE':
880 # The last time when the state was not UP
881 def last_time_non_ok_or_up(self
):
882 if self
.last_time_down
> self
.last_time_up
:
883 last_time_non_up
= self
.last_time_down
886 return last_time_non_up
889 # Add a log entry with a HOST ALERT like:
890 # HOST ALERT: server;DOWN;HARD;1;I don't know what to say...
891 def raise_alert_log_entry(self
):
892 logger
.log('HOST ALERT: %s;%s;%s;%d;%s' % (self
.get_name(), self
.state
, self
.state_type
, self
.attempt
, self
.output
))
895 # Add a log entry with a Freshness alert like:
896 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
897 # I'm forcing an immediate check of the host.
898 def raise_freshness_log_entry(self
, t_stale_by
, t_threshold
):
899 logger
.log("Warning: The results of host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the host." \
900 % (self
.get_name(), format_t_into_dhms_format(t_stale_by
), format_t_into_dhms_format(t_threshold
)))
903 # Raise a log entry with a Notification alert like
904 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
905 def raise_notification_log_entry(self
, n
):
907 command
= n
.command_call
908 if n
.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
909 state
= '%s (%s)' % (n
.type, self
.state
)
912 if self
.__class
__.log_notifications
:
913 logger
.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact
.get_name(), self
.get_name(), state
, \
914 command
.get_name(), self
.output
))
916 # Raise a log entry with a Eventhandler alert like
917 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
918 def raise_event_handler_log_entry(self
, command
):
919 if self
.__class
__.log_event_handlers
:
920 logger
.log("HOST EVENT HANDLER: %s;%s;%s;%s;%s" % (self
.get_name(), self
.state
, self
.state_type
, self
.attempt
, \
924 #Raise a log entry with FLAPPING START alert like
925 #HOST FLAPPING ALERT: server;STARTED; Host appears to have started flapping (50.6% change >= 50.0% threshold)
926 def raise_flapping_start_log_entry(self
, change_ratio
, threshold
):
927 logger
.log("HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%.1f% change >= %.1% threshold)" % \
928 (self
.get_name(), change_ratio
, threshold
))
931 #Raise a log entry with FLAPPING STOP alert like
932 #HOST FLAPPING ALERT: server;STOPPED; host appears to have stopped flapping (23.0% change < 25.0% threshold)
933 def raise_flapping_stop_log_entry(self
, change_ratio
, threshold
):
934 logger
.log("HOST FLAPPING ALERT: %s;STOPPED; Host appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
935 (self
.get_name(), change_ratio
, threshold
))
938 #If there is no valid time for next check, raise a log entry
939 def raise_no_next_check_log_entry(self
):
940 logger
.log("Warning : I cannot schedule the check for the host '%s' because there is not future valid time" % \
943 #Raise a log entry when a downtime begins
944 #HOST DOWNTIME ALERT: test_host_0;STARTED; Host has entered a period of scheduled downtime
945 def raise_enter_downtime_log_entry(self
):
946 logger
.log("HOST DOWNTIME ALERT: %s;STARTED; Host has entered a period of scheduled downtime" % \
950 #Raise a log entry when a downtime has finished
951 #HOST DOWNTIME ALERT: test_host_0;STOPPED; Host has exited from a period of scheduled downtime
952 def raise_exit_downtime_log_entry(self
):
953 logger
.log("HOST DOWNTIME ALERT: %s;STOPPED; Host has exited from a period of scheduled downtime" % \
957 #Raise a log entry when a downtime prematurely ends
958 #HOST DOWNTIME ALERT: test_host_0;CANCELLED; Service has entered a period of scheduled downtime
959 def raise_cancel_downtime_log_entry(self
):
960 logger
.log("HOST DOWNTIME ALERT: %s;CANCELLED; Scheduled downtime for host has been cancelled." % \
965 #Launch if check is waitconsume==first time
966 #and if c.status is in self.stalking_options
967 def manage_stalking(self
, c
):
969 if c
.status
== 'waitconsume':
970 if c
.exit_status
== 0 and 'o' in self
.stalking_options
:
972 elif c
.exit_status
== 1 and 'd' in self
.stalking_options
:
974 elif c
.exit_status
== 2 and 'd' in self
.stalking_options
:
976 elif c
.exit_status
== 3 and 'u' in self
.stalking_options
:
978 if c
.output
!= self
.output
:
981 logger
.log("Stalking %s : %s" % (self
.get_name(), self
.output
))
984 #fill act_depend_of with my parents (so network dep)
985 #and say parents they impact me, no timeperiod and folow parents of course
986 def fill_parents_dependancie(self
):
987 for parent
in self
.parents
:
988 if parent
is not None:
989 #I add my parent in my list
990 self
.act_depend_of
.append( (parent
, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
992 #And I register myself in my parent list too
993 parent
.register_child(self
)
995 # And add the parent/child dep filling too, for broking
996 parent
.register_son_in_parent_child_dependencies(self
)
999 # Register a child in our lists
1000 def register_child(self
, child
):
1001 # We've got 2 list : a list for our child
1002 # where we just put the pointer, it's jsut for broking
1003 # and anotehr with all data, useful for 'running' part
1004 self
.childs
.append(child
)
1005 self
.act_depend_of_me
.append( (child
, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
1008 #Give data for checks's macros
1009 def get_data_for_checks(self
):
1012 #Give data for event handler's macro
1013 def get_data_for_event_handler(self
):
1016 #Give data for notifications'n macros
1017 def get_data_for_notifications(self
, contact
, n
):
1018 return [self
, contact
, n
]
1021 #See if the notification is launchable (time is OK and contact is OK too)
1022 def notification_is_blocked_by_contact(self
, n
, contact
):
1023 return not contact
.want_host_notification(self
.last_chk
, self
.state
, n
.type, self
.criticity
)
1027 def get_duration_sec(self
):
1028 return str(int(self
.duration_sec
))
1031 def get_duration(self
):
1032 m
, s
= divmod(self
.duration_sec
, 60)
1033 h
, m
= divmod(m
, 60)
1034 return "%02dh %02dm %02ds" % (h
, m
, s
)
1037 #Check if a notification for this host is suppressed at this time
1038 #This is a check at the host level. Do not look at contacts here
1039 def notification_is_blocked_by_item(self
, type, t_wished
= None):
1040 if t_wished
== None:
1041 t_wished
= time
.time()
1044 # forced notification -> false
1045 # custom notification -> false
1047 # Block if notifications are program-wide disabled
1048 if not self
.enable_notifications
:
1051 # Does the notification period allow sending out this notification?
1052 if not self
.notification_period
.is_time_valid(t_wished
):
1055 # Block if notifications are disabled for this host
1056 if not self
.notifications_enabled
:
1059 # Block if the current status is in the notification_options d,u,r,f,s
1060 if 'n' in self
.notification_options
:
1063 if type in ('PROBLEM', 'RECOVERY'):
1064 if self
.state
== 'DOWN' and not 'd' in self
.notification_options
:
1066 if self
.state
== 'UP' and not 'r' in self
.notification_options
:
1068 if self
.state
== 'UNREACHABLE' and not 'u' in self
.notification_options
:
1070 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
1071 and not 'f' in self
.notification_options
):
1073 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
1074 and not 's' in self
.notification_options
):
1077 # Acknowledgements make no sense when the status is ok/up
1078 if type == 'ACKNOWLEDGEMENT':
1079 if self
.state
== self
.ok_up
:
1083 if type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
1084 # todo block if not notify_on_flapping
1085 if self
.scheduled_downtime_depth
> 0:
1088 # When in deep downtime, only allow end-of-downtime notifications
1089 # In depth 1 the downtime just started and can be notified
1090 if self
.scheduled_downtime_depth
> 1 and not type in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
1093 # Block if in a scheduled downtime and a problem arises
1094 if self
.scheduled_downtime_depth
> 0 and type in ('PROBLEM', 'RECOVERY'):
1097 # Block if the status is SOFT
1098 if self
.state_type
== 'SOFT' and type == 'PROBLEM':
1101 # Block if the problem has already been acknowledged
1102 if self
.problem_has_been_acknowledged
and type != 'ACKNOWLEDGEMENT':
1106 if self
.is_flapping
:
1112 #Get a oc*p command if item has obsess_over_*
1113 #command. It must be enabled locally and globally
1114 def get_obsessive_compulsive_processor_command(self
):
1115 cls
= self
.__class
__
1116 if not cls
.obsess_over
or not self
.obsess_over_host
:
1120 data
= self
.get_data_for_event_handler()
1121 cmd
= m
.resolve_command(cls
.ochp_command
, data
)
1122 e
= EventHandler(cmd
, timeout
=cls
.ochp_timeout
)
1124 #ok we can put it in our temp action queue
1125 self
.actions
.append(e
)
1131 name_property
= "host_name" #use for the search by name
1132 inner_class
= Host
#use for know what is in items
1135 #prepare_for_conf_sending to flatten some properties
1136 def prepare_for_sending(self
):
1138 h
.prepare_for_conf_sending()
1141 #Create link between elements:
1142 #hosts -> timeperiods
1143 #hosts -> hosts (parents, etc)
1144 #hosts -> commands (check_command)
1146 def linkify(self
, timeperiods
=None, commands
=None, contacts
=None, realms
=None, resultmodulations
=None, escalations
=None, hostgroups
=None):
1147 self
.linkify_with_timeperiods(timeperiods
, 'notification_period')
1148 self
.linkify_with_timeperiods(timeperiods
, 'check_period')
1149 self
.linkify_with_timeperiods(timeperiods
, 'maintenance_period')
1150 self
.linkify_h_by_h()
1151 self
.linkify_h_by_hg(hostgroups
)
1152 self
.linkify_one_command_with_commands(commands
, 'check_command')
1153 self
.linkify_one_command_with_commands(commands
, 'event_handler')
1155 self
.linkify_with_contacts(contacts
)
1156 self
.linkify_h_by_realms(realms
)
1157 self
.linkify_with_resultmodulations(resultmodulations
)
1158 #WARNING: all escalations will not be link here
1159 #(just the escalation here, not serviceesca or hostesca).
1160 #This last one will be link in escalations linkify.
1161 self
.linkify_with_escalations(escalations
)
1164 #Fill adress by host_name if not set
1165 def fill_predictive_missing_parameters(self
):
1167 h
.fill_predictive_missing_parameters()
1170 #Link host with hosts (parents)
1171 def linkify_h_by_h(self
):
1174 #The new member list
1176 for parent
in parents
:
1177 parent
= parent
.strip()
1178 p
= self
.find_by_name(parent
)
1180 new_parents
.append(p
)
1182 err
= "Error : the parent '%s' on host '%s' is unknown!" % (parent
, h
.get_name())
1183 self
.configuration_errors
.append(err
)
1184 #print "Me,", h.host_name, "define my parents", new_parents
1185 #We find the id, we remplace the names
1186 h
.parents
= new_parents
1189 #Link with realms and set a default realm if none
1190 def linkify_h_by_realms(self
, realms
):
1191 default_realm
= None
1193 if hasattr(r
, 'default') and r
.default
:
1195 if default_realm
== None:
1196 print "Error : there is no default realm defined!"
1198 #print h.get_name(), h.realm
1200 p
= realms
.find_by_name(h
.realm
.strip())
1203 print "Host", h
.get_name(), "is in the realm", p
.get_name()
1205 err
= "Error : the host %s got a invalid realm (%s)!" % (h
.get_name(), h
.realm
)
1206 h
.configuration_errors
.append(err
)
1209 #print "Notice : applying default realm %s to host %s" % (default_realm.get_name(), h.get_name())
1210 h
.realm
= default_realm
1211 h
.got_default_realm
= True
1214 #We look for hostgroups property in hosts and
1216 def linkify_h_by_hg(self
, hostgroups
):
1217 #Hostgroups property need to be fullfill for got the informations
1218 #self.apply_partial_inheritance('hostgroups')
1219 #self.apply_partial_inheritance('contact_groups')
1221 #Register host in the hostgroups
1225 if hasattr(h
, 'hostgroups') and h
.hostgroups
!= '':
1226 hgs
= h
.hostgroups
.split(',')
1228 hg_name
= hg_name
.strip()
1229 hg
= hostgroups
.find_by_name(hg_name
)
1231 new_hostgroups
.append(hg
)
1233 err
= "Error : the hostgroup '%s' of the host '%s' is unknown" % (hg_name
, h
.host_name
)
1234 h
.configuration_errors
.append(err
)
1235 h
.hostgroups
= new_hostgroups
1239 #It's used to change old Nagios2 names to
1241 def old_properties_names_to_new(self
):
1243 h
.old_properties_names_to_new()
1247 #We look for hostgroups property in hosts and
1248 def explode(self
, hostgroups
, contactgroups
):
1249 #Hostgroups property need to be fullfill for got the informations
1250 #self.apply_partial_inheritance('hostgroups')
1251 #self.apply_partial_inheritance('contact_groups')
1253 #Register host in the hostgroups
1255 if not h
.is_tpl() and hasattr(h
, 'host_name'):
1257 if hasattr(h
, 'hostgroups'):
1258 hgs
= h
.hostgroups
.split(',')
1260 hostgroups
.add_member(hname
, hg
.strip())
1262 #items::explode_contact_groups_into_contacts
1263 #take all contacts from our contact_groups into our contact property
1264 self
.explode_contact_groups_into_contacts(contactgroups
)
1268 #Create depenancies:
1269 #Depencies at the host level: host parent
1270 def apply_dependancies(self
):
1272 h
.fill_parents_dependancie()
1275 #Parent graph: use to find quickly relations between all host, and loop
1276 #return True if tehre is a loop
1277 def no_loop_in_parents(self
):
1278 #Ok, we say "from now, no loop :) "
1281 #Create parent graph
1284 #With all hosts as nodes
1293 parents
.add_edge(p
, h
)
1295 #Now get the list of all hosts in a loop
1296 host_in_loops
= parents
.loop_check()
1298 #and raise errors about it
1299 for h
in host_in_loops
:
1300 logger
.log("Error: The host '%s' is part of a circular parent/child chain!" % h
.get_name())
1306 #Return a list of the host_name of the hosts
1307 #that gotthe template with name=tpl_name
1308 def find_hosts_that_use_template(self
, tpl_name
):
1310 #first find the template
1313 #Look fortemplate with the good name
1314 if h
.is_tpl() and hasattr(h
, 'name') and h
.name
== tpl_name
:
1317 #If we find noone, we return nothing (easy case:) )
1321 #Ok, we find the tpl
1323 if tpl
in h
.templates
and hasattr(h
, 'host_name'):
1324 res
.append(h
.host_name
)
1329 # Will create all business tree for the
1331 def create_business_rules(self
, hosts
, services
):
1333 h
.create_business_rules(hosts
, services
)
1336 # Will link all business service/host with theirs
1337 # dep for problem/impact link
1338 def create_business_rules_dependencies(self
):
1340 h
.create_business_rules_dependencies()