2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
5 # Gregory Starck, g.starck@gmail.com
6 # Hartmut Goebel, h.goebel@goebel-consult.de
8 #This file is part of Shinken.
10 #Shinken is free software: you can redistribute it and/or modify
11 #it under the terms of the GNU Affero General Public License as published by
12 #the Free Software Foundation, either version 3 of the License, or
13 #(at your option) any later version.
15 #Shinken is distributed in the hope that it will be useful,
16 #but WITHOUT ANY WARRANTY; without even the implied warranty of
17 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 #GNU Affero General Public License for more details.
20 #You should have received a copy of the GNU Affero General Public License
21 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
23 """ This is the main class for the Host. In fact it's mainly
24 about the configuration part. for the running one, it's better
25 to look at the schedulingitem class that manage all
26 scheduling/consome check smart things :)
31 from shinken
.objects
import Items
, SchedulingItem
32 from shinken
.autoslots
import AutoSlots
33 from shinken
.util
import format_t_into_dhms_format
, to_hostnames_list
, get_obj_name
, to_svc_hst_distinct_lists
, to_list_string_of_names
34 from shinken
.property import BoolProp
, IntegerProp
, FloatProp
, CharProp
, StringProp
, ListProp
35 from shinken
.graph
import Graph
36 from shinken
.macroresolver
import MacroResolver
37 from shinken
.eventhandler
import EventHandler
38 from shinken
.log
import logger
41 class Host(SchedulingItem
):
42 #AutoSlots create the __slots__ with properties and
43 #running_properties names
44 __metaclass__
= AutoSlots
46 id = 1 #0 is reserved for host (primary node for parents)
51 # properties defined by configuration
52 # *required : is required in conf
53 # *default : default value if no set in conf
54 # *pythonize : function to call when transfort string to python object
55 # *fill_brok : if set, send to broker. there are two categories: full_status for initial and update status, check_result for check results
56 # *no_slots : do not take this property for __slots__
57 # Only for the inital call
58 # conf_send_preparation : if set, will pass the property to this function. It's used to "flatten"
59 # some dangerous properties like realms that are too 'linked' to be send like that.
60 # brok_transformation : if set, will call the function with the value of the property
61 # the major times it will be to flatten the data (like realm_name instead of the realm object).
63 'host_name': StringProp(fill_brok
=['full_status', 'check_result', 'next_schedule']),
64 'alias': StringProp(fill_brok
=['full_status']),
65 'display_name': StringProp(default
='none', fill_brok
=['full_status']),
66 'address': StringProp(fill_brok
=['full_status']),
67 'parents': ListProp(brok_transformation
=to_hostnames_list
, default
='', fill_brok
=['full_status']),
68 'hostgroups': StringProp(brok_transformation
=to_list_string_of_names
, default
='', fill_brok
=['full_status']),
69 'check_command': StringProp(default
='_internal_host_up', fill_brok
=['full_status']),
70 'initial_state': CharProp(default
='u', fill_brok
=['full_status']),
71 'max_check_attempts': IntegerProp(fill_brok
=['full_status']),
72 'check_interval': IntegerProp(default
='0', fill_brok
=['full_status']),
73 'retry_interval': IntegerProp(default
='0', fill_brok
=['full_status']),
74 'active_checks_enabled': BoolProp(default
='1', fill_brok
=['full_status']),
75 'passive_checks_enabled': BoolProp(default
='1', fill_brok
=['full_status']),
76 'check_period': StringProp(fill_brok
=['full_status']),
77 'obsess_over_host': BoolProp(default
='0', fill_brok
=['full_status']),
78 'check_freshness': BoolProp(default
='0', fill_brok
=['full_status']),
79 'freshness_threshold': IntegerProp(default
='0', fill_brok
=['full_status']),
80 'event_handler': StringProp(default
='', fill_brok
=['full_status']),
81 'event_handler_enabled': BoolProp(default
='0', fill_brok
=['full_status']),
82 'low_flap_threshold': IntegerProp(default
='25', fill_brok
=['full_status']),
83 'high_flap_threshold': IntegerProp(default
='50', fill_brok
=['full_status']),
84 'flap_detection_enabled': BoolProp(default
='1', fill_brok
=['full_status']),
85 'flap_detection_options': ListProp(default
='o,d,u', fill_brok
=['full_status']),
86 'process_perf_data': BoolProp(default
='1', fill_brok
=['full_status']),
87 'retain_status_information': BoolProp(default
='1', fill_brok
=['full_status']),
88 'retain_nonstatus_information': BoolProp(default
='1', fill_brok
=['full_status']),
89 'contacts': StringProp(default
='', fill_brok
=['full_status']),
90 'contact_groups': StringProp(default
='', fill_brok
=['full_status']),
91 'notification_interval': IntegerProp(default
='60', fill_brok
=['full_status']),
92 'first_notification_delay': IntegerProp(default
='0', fill_brok
=['full_status']),
93 'notification_period': StringProp(fill_brok
=['full_status']),
94 'notification_options': ListProp(default
='d,u,r,f', fill_brok
=['full_status']),
95 'notifications_enabled': BoolProp(default
='1', fill_brok
=['full_status']),
96 'stalking_options': ListProp(default
='', fill_brok
=['full_status']),
97 'notes': StringProp(default
='', fill_brok
=['full_status']),
98 'notes_url': StringProp(default
='', fill_brok
=['full_status']),
99 'action_url': StringProp(default
='', fill_brok
=['full_status']),
100 'icon_image': StringProp(default
='', fill_brok
=['full_status']),
101 'icon_image_alt': StringProp(default
='', fill_brok
=['full_status']),
102 'vrml_image': StringProp(default
='', fill_brok
=['full_status']),
103 'statusmap_image': StringProp(default
='', fill_brok
=['full_status']),
105 # No slots for this 2 because begin property by a number seems bad
107 '2d_coords': StringProp(default
='', fill_brok
=['full_status'], no_slots
=True),
108 '3d_coords': StringProp(default
='', fill_brok
=['full_status'], no_slots
=True),
109 'failure_prediction_enabled': BoolProp(default
='0', fill_brok
=['full_status']),
112 # 'fill_brok' is ok because in scheduler it's already
113 # a string from conf_send_preparation
114 'realm': StringProp(default
=None, fill_brok
=['full_status'], conf_send_preparation
=get_obj_name
),
115 'poller_tag': StringProp(default
='None'),
116 'reactionner_tag': StringProp(default
='None'),
117 'resultmodulations': StringProp(default
=''),
118 'escalations': StringProp(default
='', fill_brok
=['full_status']),
119 'maintenance_period': StringProp(default
='', fill_brok
=['full_status']),
122 'criticity': IntegerProp(default
='3', fill_brok
=['full_status']),
125 # properties set only for running purpose
126 # retention : save/load this property from retention
127 running_properties
= {
128 'last_chk': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
129 'next_chk': IntegerProp(default
=0, fill_brok
=['full_status', 'next_schedule']),
130 'in_checking': BoolProp(default
=False, fill_brok
=['full_status', 'check_result', 'next_schedule']),
131 'latency': FloatProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
132 'attempt': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
133 'state': StringProp(default
='PENDING', fill_brok
=['full_status'], retention
=True),
134 'state_id': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
135 'state_type': StringProp(default
='HARD', fill_brok
=['full_status'], retention
=True),
136 'state_type_id': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
137 'current_event_id': StringProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
138 'last_event_id': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
139 'last_state': StringProp(default
='PENDING', fill_brok
=['full_status'], retention
=True),
140 'last_state_id': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
141 'last_state_type' : StringProp(default
='HARD', fill_brok
=['full_status'], retention
=True),
142 'last_state_change': FloatProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
143 'last_hard_state_change': FloatProp(default
=time
.time(), fill_brok
=['full_status', 'check_result'], retention
=True),
144 'last_hard_state': StringProp(default
='PENDING', fill_brok
=['full_status'], retention
=True),
145 'last_hard_state_id' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
146 'last_time_up': IntegerProp(default
=int(time
.time()), fill_brok
=['full_status', 'check_result'], retention
=True),
147 'last_time_down': IntegerProp(default
=int(time
.time()), fill_brok
=['full_status', 'check_result'], retention
=True),
148 'last_time_unreachable': IntegerProp(default
=int(time
.time()), fill_brok
=['full_status', 'check_result'], retention
=True),
149 'duration_sec': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
150 'output': StringProp(default
='', fill_brok
=['full_status', 'check_result'], retention
=True),
151 'long_output': StringProp(default
='', fill_brok
=['full_status', 'check_result'], retention
=True),
152 'is_flapping': BoolProp(default
=False, fill_brok
=['full_status'], retention
=True),
153 'flapping_comment_id': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
154 # No broks for _depend_of because of to much links to hosts/services
155 # dependencies for actions like notif of event handler, so AFTER check return
156 'act_depend_of': StringProp(default
=[]),
158 # dependencies for checks raise, so BEFORE checks
159 'chk_depend_of': StringProp(default
=[]),
161 # elements that depend of me, so the reverse than just uppper
162 'act_depend_of_me': StringProp(default
=[]),
164 # elements that depend of me
165 'chk_depend_of_me': StringProp(default
=[]),
166 'last_state_update': StringProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
168 # no brok ,to much links
169 'services': StringProp(default
=[]),
171 # No broks, it's just internal, and checks have too links
172 'checks_in_progress': StringProp(default
=[]),
174 # No broks, it's just internal, and checks have too links
175 'notifications_in_progress': StringProp(default
={}, retention
=True),
176 'downtimes': StringProp(default
=[], fill_brok
=['full_status'], retention
=True),
177 'comments': StringProp(default
=[], fill_brok
=['full_status'], retention
=True),
178 'flapping_changes': StringProp(default
=[], fill_brok
=['full_status'], retention
=True),
179 'percent_state_change': FloatProp(default
=0.0, fill_brok
=['full_status'], retention
=True),
180 'problem_has_been_acknowledged': BoolProp(default
=False, fill_brok
=['full_status'], retention
=True),
181 'acknowledgement': StringProp(default
=None, retention
=True),
182 'acknowledgement_type': IntegerProp(default
=1, fill_brok
=['full_status', 'check_result'], retention
=True),
183 'check_type': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
184 'has_been_checked': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
185 'should_be_scheduled': IntegerProp(default
=1, fill_brok
=['full_status'], retention
=True),
186 'last_problem_id': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
187 'current_problem_id': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
188 'execution_time': FloatProp(default
=0.0, fill_brok
=['full_status', 'check_result'], retention
=True),
189 'last_notification': FloatProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
190 'current_notification_number': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
191 'current_notification_id': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
192 'check_flapping_recovery_notification': BoolProp(default
=True, fill_brok
=['full_status'], retention
=True),
193 'scheduled_downtime_depth': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
194 'pending_flex_downtime': IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
195 'timeout': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
196 'start_time': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
197 'end_time': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
198 'early_timeout': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
199 'return_code': IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
200 'perf_data': StringProp(default
='', fill_brok
=['full_status', 'check_result'], retention
=True),
201 'last_perf_data': StringProp(default
='', retention
=True),
202 'customs': StringProp(default
={}, fill_brok
=['full_status']),
203 'got_default_realm' : BoolProp(default
=False),
205 # use for having all contacts we have notified
206 'notified_contacts': StringProp(default
=set()),
208 'in_scheduled_downtime': BoolProp(default
=False, retention
=True),
209 'in_scheduled_downtime_during_last_check': BoolProp(default
=False, retention
=True),
211 # put here checks and notif raised
212 'actions': StringProp(default
=[]),
213 # and here broks raised
214 'broks': StringProp(default
=[]),
216 # For knowing with which elements we are in relation
218 # childs are the hosts that have US as parent, so
220 'childs': StringProp(brok_transformation
=to_hostnames_list
, default
=[], fill_brok
=['full_status']),
221 # Here it's the elements we are depending on
222 # so our parents as network relation, or a host
223 # we are depending in a hostdependency
224 # or even if we are businesss based.
225 'parent_dependencies' : StringProp(brok_transformation
=to_svc_hst_distinct_lists
, default
=[], fill_brok
=['full_status']),
226 # Here it's the guys taht depend on us. So it's the total
227 # oposite of the parent_dependencies
228 'child_dependencies': StringProp(
229 brok_transformation
=to_svc_hst_distinct_lists
,
231 fill_brok
=['full_status']),
233 # All errors and warning raised during the configuration parsing
234 # and taht will raised real warning/errors during the is_correct
235 'configuration_warnings': StringProp(default
=[]),
236 'configuration_errors': StringProp(default
=[]),
238 ### Problem/impact part
239 'is_problem': StringProp(default
=False, fill_brok
=['full_status']),
240 'is_impact': StringProp(default
=False, fill_brok
=['full_status']),
241 # the save value of our criticity for "problems"
242 'my_own_criticity': IntegerProp(default
=-1),
244 # list of problems that make us an impact
245 'source_problems': StringProp(brok_transformation
=to_svc_hst_distinct_lists
, default
=[], fill_brok
=['full_status']),
247 # list of the impact I'm the cause of
248 'impacts': StringProp(brok_transformation
=to_svc_hst_distinct_lists
, default
=[], fill_brok
=['full_status']),
250 # keep a trace of the old state before being an impact
251 'state_before_impact': StringProp(default
='PENDING'),
252 # keep a trace of the old state id before being an impact
253 'state_id_before_impact': StringProp(default
=0),
254 # if the state change, we know so we do not revert it
255 'state_changed_since_impact': StringProp(default
=False),
257 #BUSINESS CORRELATOR PART
258 # Say if we are business based rule or not
259 'got_business_rule' : BoolProp(default
=False, fill_brok
=['full_status']),
260 # Our Dependency node for the business rule
261 'business_rule' : StringProp(default
=None),
263 # Manage the unkown/unreach during hard state
264 # From now its not really used
265 'in_hard_unknown_reach_phase' : BoolProp(default
=False, retention
=True),
266 'was_in_hard_unknown_reach_phase' : BoolProp(default
=False, retention
=True),
267 'state_before_hard_unknown_reach_phase' : StringProp(default
='UP', retention
=True),
270 # Hosts macros and prop that give the information
271 # the prop can be callable or not
273 'HOSTNAME': 'host_name',
274 'HOSTDISPLAYNAME': 'display_name',
275 'HOSTALIAS': 'alias',
276 'HOSTADDRESS': 'address',
277 'HOSTSTATE': 'state',
278 'HOSTSTATEID': 'state_id',
279 'LASTHOSTSTATE': 'last_state',
280 'LASTHOSTSTATEID': 'last_state_id',
281 'HOSTSTATETYPE': 'state_type',
282 'HOSTATTEMPT': 'attempt',
283 'MAXHOSTATTEMPTS': 'max_check_attempts',
284 'HOSTEVENTID': 'current_event_id',
285 'LASTHOSTEVENTID': 'last_event_id',
286 'HOSTPROBLEMID': 'current_problem_id',
287 'LASTHOSTPROBLEMID': 'last_problem_id',
288 'HOSTLATENCY': 'latency',
289 'HOSTEXECUTIONTIME': 'execution_time',
290 'HOSTDURATION': 'get_duration',
291 'HOSTDURATIONSEC': 'get_duration_sec',
292 'HOSTDOWNTIME': 'get_downtime',
293 'HOSTPERCENTCHANGE': 'percent_state_change',
294 'HOSTGROUPNAME': 'get_groupname',
295 'HOSTGROUPNAMES': 'get_groupnames',
296 'LASTHOSTCHECK': 'last_chk',
297 'LASTHOSTSTATECHANGE': 'last_state_change',
298 'LASTHOSTUP': 'last_time_up',
299 'LASTHOSTDOWN': 'last_time_down',
300 'LASTHOSTUNREACHABLE': 'last_time_unreachable',
301 'HOSTOUTPUT': 'output',
302 'LONGHOSTOUTPUT': 'long_output',
303 'HOSTPERFDATA': 'perf_data',
304 'LASTHOSTPERFDATA': 'last_perf_data',
305 'HOSTCHECKCOMMAND': 'get_check_command',
306 'HOSTACKAUTHOR': 'get_ack_author_name',
307 'HOSTACKAUTHORNAME': 'get_ack_author_name',
308 'HOSTACKAUTHORALIAS': 'get_ack_author_name',
309 'HOSTACKCOMMENT': 'get_ack_comment',
310 'HOSTACTIONURL': 'action_url',
311 'HOSTNOTESURL': 'notes_url',
312 'HOSTNOTES': 'notes',
313 'TOTALHOSTSERVICES': 'get_total_services',
314 'TOTALHOSTSERVICESOK': 'get_total_services_ok',
315 'TOTALHOSTSERVICESWARNING': 'get_total_services_warning',
316 'TOTALHOSTSERVICESUNKNOWN': 'get_total_services_unknown',
317 'TOTALHOSTSERVICESCRITICAL': 'get_total_services_critical'
321 # This tab is used to transform old parameters name into new ones
322 # so from Nagios2 format, to Nagios3 ones
324 'normal_check_interval': 'check_interval',
325 'retry_check_interval': 'retry_interval'
333 # Call by picle for data-ify the host
334 # we do a dict because list are too dangerous for
335 # retention save and co :( even if it's more
337 # The setstate function do the inverse
338 def __getstate__(self
):
340 # id is not in *_properties
341 res
= {'id' : self
.id}
342 for prop
in cls
.properties
:
343 if hasattr(self
, prop
):
344 res
[prop
] = getattr(self
, prop
)
345 for prop
in cls
.running_properties
:
346 if hasattr(self
, prop
):
347 res
[prop
] = getattr(self
, prop
)
351 # Inversed funtion of getstate
352 def __setstate__(self
, state
):
354 self
.id = state
['id']
355 for prop
in cls
.properties
:
357 setattr(self
, prop
, state
[prop
])
358 for prop
in cls
.running_properties
:
360 setattr(self
, prop
, state
[prop
])
364 # Fill adresse with host_name if not already set
365 def fill_predictive_missing_parameters(self
):
366 if hasattr(self
, 'host_name') and not hasattr(self
, 'address'):
367 self
.address
= self
.host_name
368 if hasattr(self
, 'host_name') and not hasattr(self
, 'alias'):
369 self
.alias
= self
.host_name
373 # Check is required prop are set:
374 # contacts OR contactgroups is need
375 def is_correct(self
):
376 state
= True #guilty or not? :)
379 special_properties
= ['check_period', 'notification_interval', 'check_period']
380 for prop
, entry
in cls
.properties
.items():
381 if prop
not in special_properties
:
382 if not hasattr(self
, prop
) and entry
.required
:
383 logger
.log("%s : I do not have %s" % (self
.get_name(), prop
))
384 state
= False #Bad boy...
386 # Raised all previously saw errors like unknown contacts and co
387 if self
.configuration_errors
!= []:
389 for err
in self
.configuration_errors
:
392 # Ok now we manage special cases...
393 if self
.notifications_enabled
and self
.contacts
== []:
394 logger
.log("Waring : the host %s do not have contacts nor contact_groups" % self
.get_name())
396 if getattr(self
, 'check_command', None) is None:
397 logger
.log("%s : I've got no check_command" % self
.get_name())
399 # Ok got a command, but maybe it's invalid
401 if not self
.check_command
.is_valid():
402 logger
.log("%s : my check_command %s is invalid" % (self
.get_name(), self
.check_command
.command
))
404 if self
.got_business_rule
:
405 if not self
.business_rule
.is_valid():
406 logger
.log("%s : my business rule is invalid" % (self
.get_name(),))
407 for bperror
in self
.business_rule
.configuration_errors
:
408 logger
.log("%s : %s" % (self
.get_name(), bperror
))
411 if not hasattr(self
, 'notification_interval') and self
.notifications_enabled
== True:
412 logger
.log("%s : I've got no notification_interval but I've got notifications enabled" % self
.get_name())
415 # If active check is enabled with a check_interval!=0, we must have a check_period
416 if ( getattr(self
, 'active_checks_enabled', False)
417 and getattr(self
, 'check_period', None) is None
418 and getattr(self
, 'check_interval', 1) != 0 ):
419 logger
.log("%s : My check_period is not correct" % self
.get_name())
422 if getattr(self
, 'realm', None) is None:
423 logger
.log("%s : My realm is not correct" % self
.get_name())
425 if not hasattr(self
, 'check_period'):
426 self
.check_period
= None
427 if hasattr(self
, 'host_name'):
428 for c
in cls
.illegal_object_name_chars
:
429 if c
in self
.host_name
:
430 logger
.log("%s : My host_name got the caracter %s that is not allowed." % (self
.get_name(), c
))
435 # Search in my service if I've got the service
436 def find_service_by_name(self
, service_description
):
437 for s
in self
.services
:
438 if s
.service_description
== service_description
:
444 def get_total_services(self
):
445 return str(len(self
.services
))
448 def get_total_services_ok(self
):
449 return str(len([s
for s
in self
.services
if s
.state_id
== 0]))
452 def get_total_services_warning(self
):
453 return str(len([s
for s
in self
.services
if s
.state_id
== 1]))
456 def get_total_services_critical(self
):
457 return str(len([s
for s
in self
.services
if s
.state_id
== 2]))
460 def get_total_services_unknown(self
):
461 return str(len([s
for s
in self
.services
if s
.state_id
== 3]))
464 def get_ack_author_name(self
):
465 if self
.acknowledgement
is None:
467 return self
.acknowledgement
.author
470 def get_ack_comment(self
):
471 if self
.acknowledgement
is None:
473 return self
.acknowledgement
.comment
476 def get_check_command(self
):
477 return self
.check_command
.get_name()
480 # For get a nice name
482 if not self
.is_tpl():
484 return self
.host_name
485 except AttributeError: # outch, no hostname
490 except AttributeError: # outch, no name for this template
491 return 'UNNAMEDHOSTTEMPLATE'
494 # For debugin purpose only
495 def get_dbg_name(self
):
496 return self
.host_name
499 # Say if we got the other in one of your dep list
500 def is_linked_with_host(self
, other
):
501 for (h
, status
, type, timeperiod
, inherits_parent
) in self
.act_depend_of
:
507 # Delete all links in the act_depend_of list of self and other
508 def del_host_act_dependancy(self
, other
):
510 # First we remove in my list
511 for (h
, status
, type, timeperiod
, inherits_parent
) in self
.act_depend_of
:
513 to_del
.append( (h
, status
, type, timeperiod
, inherits_parent
))
515 self
.act_depend_of
.remove(t
)
517 #And now in the father part
519 for (h
, status
, type, timeperiod
, inherits_parent
) in other
.act_depend_of_me
:
521 to_del
.append( (h
, status
, type, timeperiod
, inherits_parent
) )
523 other
.act_depend_of_me
.remove(t
)
526 # Add a dependancy for action event handler, notification, etc)
527 # and add ourself in it's dep list
528 def add_host_act_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
529 # I add him in MY list
530 self
.act_depend_of
.append( (h
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
531 # And I add me in it's list
532 h
.act_depend_of_me
.append( (self
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
534 # And the parent/child dep lists too
535 h
.register_son_in_parent_child_dependencies(self
)
538 # Register the dependancy between 2 service for action (notification etc)
539 # but based on a BUSINESS rule, so on fact:
540 # ERP depend on database, so we fill just database.act_depend_of_me
541 # because we will want ERP mails to go on! So call this
542 # on the database service with the srv=ERP service
543 def add_business_rule_act_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
544 # first I add the other the I depend on in MY list
545 # self.act_depend_of.append( (srv, status, 'logic_dep',
546 # timeperiod, inherits_parent) )
547 # I only register so he know that I WILL be a inpact
548 self
.act_depend_of_me
.append( (h
, status
, 'business_dep',
549 timeperiod
, inherits_parent
) )
551 # And the parent/child dep lists too
552 self
.register_son_in_parent_child_dependencies(h
)
555 # Add a dependancy for check (so before launch)
556 def add_host_chk_dependancy(self
, h
, status
, timeperiod
, inherits_parent
):
557 # I add him in MY list
558 self
.chk_depend_of
.append( (h
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
559 # And I add me in it's list
560 h
.chk_depend_of_me
.append( (self
, status
, 'logic_dep', timeperiod
, inherits_parent
) )
562 # And we fill parent/childs dep for brok purpose
563 # Here self depend on h
564 h
.register_son_in_parent_child_dependencies(self
)
567 # Add one of our service to services (at linkify)
568 def add_service_link(self
, service
):
569 self
.services
.append(service
)
572 # Set unreachable : all our parents are down!
573 # We have a special state, but state was already set, we just need to
574 # update it. We are no DOWN, we are UNREACHABLE and
575 # got a state id is 2
576 def set_unreachable(self
):
579 self
.state
= 'UNREACHABLE'
580 self
.last_time_unreachable
= int(now
)
583 # We just go an impact, so we go unreachable
584 # But only if we enable this stte change in the conf
585 def set_impact_state(self
):
587 if cls
.enable_problem_impacts_states_change
:
588 # Keep a trace of the old state (problem came back before
590 self
.state_before_impact
= self
.state
591 self
.state_id_before_impact
= self
.state_id
592 # This flag will know if we overide the impact state
593 self
.state_changed_since_impact
= False
594 self
.state
= 'UNREACHABLE'#exit code UNDETERMINED
598 # Ok, we are no more an impact, if no news checks
599 # overide the impact state, we came back to old
601 # And only if impact state change is set in configuration
602 def unset_impact_state(self
):
604 if cls
.enable_problem_impacts_states_change
and not self
.state_changed_since_impact
:
605 self
.state
= self
.state_before_impact
606 self
.state_id
= self
.state_id_before_impact
609 # set the state in UP, DOWN, or UNDETERMINED
610 # with the status of a check. Also update last_state
611 def set_state_from_exit_status(self
, status
):
613 self
.last_state_update
= now
615 # we should put in last_state the good last state:
616 # if not just change the state by an problem/impact
617 # we can take current state. But if it's the case, the
618 # real old state is self.state_before_impact (it's teh TRUE
620 # And only if we enable the impact state change
622 if cls
.enable_problem_impacts_states_change
and self
.is_impact
and not self
.state_changed_since_impact
:
623 self
.last_state
= self
.state_before_impact
625 self
.last_state
= self
.state
630 self
.last_time_up
= int(self
.last_state_update
)
632 elif status
in (1, 2, 3):
635 self
.last_time_down
= int(self
.last_state_update
)
638 self
.state
= 'DOWN'#exit code UNDETERMINED
640 self
.last_time_down
= int(self
.last_state_update
)
642 if state_code
in self
.flap_detection_options
:
643 self
.add_flapping_change(self
.state
!= self
.last_state
)
644 if self
.state
!= self
.last_state
:
645 self
.last_state_change
= self
.last_state_update
646 self
.duration_sec
= now
- self
.last_state_change
649 # See if status is status. Can be low of high format (o/UP, d/DOWN, ...)
650 def is_state(self
, status
):
651 if status
== self
.state
:
654 elif status
== 'o' and self
.state
== 'UP':
656 elif status
== 'd' and self
.state
== 'DOWN':
658 elif status
== 'u' and self
.state
== 'UNREACHABLE':
663 # The last time when the state was not UP
664 def last_time_non_ok_or_up(self
):
665 if self
.last_time_down
> self
.last_time_up
:
666 last_time_non_up
= self
.last_time_down
669 return last_time_non_up
672 # Add a log entry with a HOST ALERT like:
673 # HOST ALERT: server;DOWN;HARD;1;I don't know what to say...
674 def raise_alert_log_entry(self
):
675 logger
.log('HOST ALERT: %s;%s;%s;%d;%s' % (self
.get_name(), self
.state
, self
.state_type
, self
.attempt
, self
.output
))
678 # Add a log entry with a Freshness alert like:
679 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
680 # I'm forcing an immediate check of the host.
681 def raise_freshness_log_entry(self
, t_stale_by
, t_threshold
):
682 logger
.log("Warning: The results of host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the host." \
683 % (self
.get_name(), format_t_into_dhms_format(t_stale_by
), format_t_into_dhms_format(t_threshold
)))
686 # Raise a log entry with a Notification alert like
687 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
688 def raise_notification_log_entry(self
, n
):
690 command
= n
.command_call
691 if n
.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
692 state
= '%s (%s)' % (n
.type, self
.state
)
695 if self
.__class
__.log_notifications
:
696 logger
.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact
.get_name(), self
.get_name(), state
, \
697 command
.get_name(), self
.output
))
699 # Raise a log entry with a Eventhandler alert like
700 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
701 def raise_event_handler_log_entry(self
, command
):
702 if self
.__class
__.log_event_handlers
:
703 logger
.log("HOST EVENT HANDLER: %s;%s;%s;%s;%s" % (self
.get_name(), self
.state
, self
.state_type
, self
.attempt
, \
707 #Raise a log entry with FLAPPING START alert like
708 #HOST FLAPPING ALERT: server;STARTED; Host appears to have started flapping (50.6% change >= 50.0% threshold)
709 def raise_flapping_start_log_entry(self
, change_ratio
, threshold
):
710 logger
.log("HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%.1f% change >= %.1% threshold)" % \
711 (self
.get_name(), change_ratio
, threshold
))
714 #Raise a log entry with FLAPPING STOP alert like
715 #HOST FLAPPING ALERT: server;STOPPED; host appears to have stopped flapping (23.0% change < 25.0% threshold)
716 def raise_flapping_stop_log_entry(self
, change_ratio
, threshold
):
717 logger
.log("HOST FLAPPING ALERT: %s;STOPPED; Host appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
718 (self
.get_name(), change_ratio
, threshold
))
721 #If there is no valid time for next check, raise a log entry
722 def raise_no_next_check_log_entry(self
):
723 logger
.log("Warning : I cannot schedule the check for the host '%s' because there is not future valid time" % \
726 #Raise a log entry when a downtime begins
727 #HOST DOWNTIME ALERT: test_host_0;STARTED; Host has entered a period of scheduled downtime
728 def raise_enter_downtime_log_entry(self
):
729 logger
.log("HOST DOWNTIME ALERT: %s;STARTED; Host has entered a period of scheduled downtime" % \
733 #Raise a log entry when a downtime has finished
734 #HOST DOWNTIME ALERT: test_host_0;STOPPED; Host has exited from a period of scheduled downtime
735 def raise_exit_downtime_log_entry(self
):
736 logger
.log("HOST DOWNTIME ALERT: %s;STOPPED; Host has exited from a period of scheduled downtime" % \
740 #Raise a log entry when a downtime prematurely ends
741 #HOST DOWNTIME ALERT: test_host_0;CANCELLED; Service has entered a period of scheduled downtime
742 def raise_cancel_downtime_log_entry(self
):
743 logger
.log("HOST DOWNTIME ALERT: %s;CANCELLED; Scheduled downtime for host has been cancelled." % \
748 #Launch if check is waitconsume==first time
749 #and if c.status is in self.stalking_options
750 def manage_stalking(self
, c
):
752 if c
.status
== 'waitconsume':
753 if c
.exit_status
== 0 and 'o' in self
.stalking_options
:
755 elif c
.exit_status
== 1 and 'd' in self
.stalking_options
:
757 elif c
.exit_status
== 2 and 'd' in self
.stalking_options
:
759 elif c
.exit_status
== 3 and 'u' in self
.stalking_options
:
761 if c
.output
!= self
.output
:
764 logger
.log("Stalking %s : %s" % (self
.get_name(), self
.output
))
767 #fill act_depend_of with my parents (so network dep)
768 #and say parents they impact me, no timeperiod and folow parents of course
769 def fill_parents_dependancie(self
):
770 for parent
in self
.parents
:
771 if parent
is not None:
772 #I add my parent in my list
773 self
.act_depend_of
.append( (parent
, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
775 #And I register myself in my parent list too
776 parent
.register_child(self
)
778 # And add the parent/child dep filling too, for broking
779 parent
.register_son_in_parent_child_dependencies(self
)
782 # Register a child in our lists
783 def register_child(self
, child
):
784 # We've got 2 list : a list for our child
785 # where we just put the pointer, it's jsut for broking
786 # and anotehr with all data, useful for 'running' part
787 self
.childs
.append(child
)
788 self
.act_depend_of_me
.append( (child
, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
791 #Give data for checks's macros
792 def get_data_for_checks(self
):
795 #Give data for event handler's macro
796 def get_data_for_event_handler(self
):
799 #Give data for notifications'n macros
800 def get_data_for_notifications(self
, contact
, n
):
801 return [self
, contact
, n
]
804 #See if the notification is launchable (time is OK and contact is OK too)
805 def notification_is_blocked_by_contact(self
, n
, contact
):
806 return not contact
.want_host_notification(self
.last_chk
, self
.state
, n
.type, self
.criticity
)
810 def get_duration_sec(self
):
811 return str(int(self
.duration_sec
))
814 def get_duration(self
):
815 m
, s
= divmod(self
.duration_sec
, 60)
817 return "%02dh %02dm %02ds" % (h
, m
, s
)
820 #Check if a notification for this host is suppressed at this time
821 #This is a check at the host level. Do not look at contacts here
822 def notification_is_blocked_by_item(self
, type, t_wished
= None):
824 t_wished
= time
.time()
827 # forced notification -> false
828 # custom notification -> false
830 # Block if notifications are program-wide disabled
831 if not self
.enable_notifications
:
834 # Does the notification period allow sending out this notification?
835 if not self
.notification_period
.is_time_valid(t_wished
):
838 # Block if notifications are disabled for this host
839 if not self
.notifications_enabled
:
842 # Block if the current status is in the notification_options d,u,r,f,s
843 if 'n' in self
.notification_options
:
846 if type in ('PROBLEM', 'RECOVERY'):
847 if self
.state
== 'DOWN' and not 'd' in self
.notification_options
:
849 if self
.state
== 'UP' and not 'r' in self
.notification_options
:
851 if self
.state
== 'UNREACHABLE' and not 'u' in self
.notification_options
:
853 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
854 and not 'f' in self
.notification_options
):
856 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
857 and not 's' in self
.notification_options
):
860 # Acknowledgements make no sense when the status is ok/up
861 if type == 'ACKNOWLEDGEMENT':
862 if self
.state
== self
.ok_up
:
866 if type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
867 # todo block if not notify_on_flapping
868 if self
.scheduled_downtime_depth
> 0:
871 # When in deep downtime, only allow end-of-downtime notifications
872 # In depth 1 the downtime just started and can be notified
873 if self
.scheduled_downtime_depth
> 1 and not type in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
876 # Block if in a scheduled downtime and a problem arises
877 if self
.scheduled_downtime_depth
> 0 and type in ('PROBLEM', 'RECOVERY'):
880 # Block if the status is SOFT
881 if self
.state_type
== 'SOFT' and type == 'PROBLEM':
884 # Block if the problem has already been acknowledged
885 if self
.problem_has_been_acknowledged
and type != 'ACKNOWLEDGEMENT':
895 #Get a oc*p command if item has obsess_over_*
896 #command. It must be enabled locally and globally
897 def get_obsessive_compulsive_processor_command(self
):
899 if not cls
.obsess_over
or not self
.obsess_over_host
:
903 data
= self
.get_data_for_event_handler()
904 cmd
= m
.resolve_command(cls
.ochp_command
, data
)
905 e
= EventHandler(cmd
, timeout
=cls
.ochp_timeout
)
907 #ok we can put it in our temp action queue
908 self
.actions
.append(e
)
914 name_property
= "host_name" #use for the search by name
915 inner_class
= Host
#use for know what is in items
918 #prepare_for_conf_sending to flatten some properties
919 def prepare_for_sending(self
):
921 h
.prepare_for_conf_sending()
924 #Create link between elements:
925 #hosts -> timeperiods
926 #hosts -> hosts (parents, etc)
927 #hosts -> commands (check_command)
929 def linkify(self
, timeperiods
=None, commands
=None, contacts
=None, realms
=None, resultmodulations
=None, escalations
=None, hostgroups
=None):
930 self
.linkify_with_timeperiods(timeperiods
, 'notification_period')
931 self
.linkify_with_timeperiods(timeperiods
, 'check_period')
932 self
.linkify_with_timeperiods(timeperiods
, 'maintenance_period')
933 self
.linkify_h_by_h()
934 self
.linkify_h_by_hg(hostgroups
)
935 self
.linkify_one_command_with_commands(commands
, 'check_command')
936 self
.linkify_one_command_with_commands(commands
, 'event_handler')
938 self
.linkify_with_contacts(contacts
)
939 self
.linkify_h_by_realms(realms
)
940 self
.linkify_with_resultmodulations(resultmodulations
)
941 #WARNING: all escalations will not be link here
942 #(just the escalation here, not serviceesca or hostesca).
943 #This last one will be link in escalations linkify.
944 self
.linkify_with_escalations(escalations
)
947 #Fill adress by host_name if not set
948 def fill_predictive_missing_parameters(self
):
950 h
.fill_predictive_missing_parameters()
953 #Link host with hosts (parents)
954 def linkify_h_by_h(self
):
959 for parent
in parents
:
960 parent
= parent
.strip()
961 p
= self
.find_by_name(parent
)
963 new_parents
.append(p
)
965 err
= "Error : the parent '%s' on host '%s' is unknown!" % (parent
, h
.get_name())
966 self
.configuration_errors
.append(err
)
967 #print "Me,", h.host_name, "define my parents", new_parents
968 #We find the id, we remplace the names
969 h
.parents
= new_parents
972 #Link with realms and set a default realm if none
973 def linkify_h_by_realms(self
, realms
):
976 if getattr(r
, 'default', False):
978 if default_realm
is None:
979 print "Error : there is no default realm defined!"
981 #print h.get_name(), h.realm
982 if h
.realm
is not None:
983 p
= realms
.find_by_name(h
.realm
.strip())
986 print "Host", h
.get_name(), "is in the realm", p
.get_name()
988 err
= "Error : the host %s got a invalid realm (%s)!" % (h
.get_name(), h
.realm
)
989 h
.configuration_errors
.append(err
)
992 #print "Notice : applying default realm %s to host %s" % (default_realm.get_name(), h.get_name())
993 h
.realm
= default_realm
994 h
.got_default_realm
= True
997 #We look for hostgroups property in hosts and
999 def linkify_h_by_hg(self
, hostgroups
):
1000 #Hostgroups property need to be fullfill for got the informations
1001 #self.apply_partial_inheritance('hostgroups')
1002 #self.apply_partial_inheritance('contact_groups')
1004 #Register host in the hostgroups
1008 if hasattr(h
, 'hostgroups') and h
.hostgroups
!= '':
1009 hgs
= h
.hostgroups
.split(',')
1011 hg_name
= hg_name
.strip()
1012 hg
= hostgroups
.find_by_name(hg_name
)
1014 new_hostgroups
.append(hg
)
1016 err
= "Error : the hostgroup '%s' of the host '%s' is unknown" % (hg_name
, h
.host_name
)
1017 h
.configuration_errors
.append(err
)
1018 h
.hostgroups
= new_hostgroups
1022 #It's used to change old Nagios2 names to
1024 def old_properties_names_to_new(self
):
1026 h
.old_properties_names_to_new()
1030 #We look for hostgroups property in hosts and
1031 def explode(self
, hostgroups
, contactgroups
):
1032 #Hostgroups property need to be fullfill for got the informations
1033 #self.apply_partial_inheritance('hostgroups')
1034 #self.apply_partial_inheritance('contact_groups')
1036 #Register host in the hostgroups
1038 if not h
.is_tpl() and hasattr(h
, 'host_name'):
1040 if hasattr(h
, 'hostgroups'):
1041 hgs
= h
.hostgroups
.split(',')
1043 hostgroups
.add_member(hname
, hg
.strip())
1045 #items::explode_contact_groups_into_contacts
1046 #take all contacts from our contact_groups into our contact property
1047 self
.explode_contact_groups_into_contacts(contactgroups
)
1051 #Create depenancies:
1052 #Depencies at the host level: host parent
1053 def apply_dependancies(self
):
1055 h
.fill_parents_dependancie()
1058 #Parent graph: use to find quickly relations between all host, and loop
1059 #return True if tehre is a loop
1060 def no_loop_in_parents(self
):
1061 #Ok, we say "from now, no loop :) "
1064 #Create parent graph
1067 #With all hosts as nodes
1076 parents
.add_edge(p
, h
)
1078 #Now get the list of all hosts in a loop
1079 host_in_loops
= parents
.loop_check()
1081 #and raise errors about it
1082 for h
in host_in_loops
:
1083 logger
.log("Error: The host '%s' is part of a circular parent/child chain!" % h
.get_name())
1089 #Return a list of the host_name of the hosts
1090 #that gotthe template with name=tpl_name
1091 def find_hosts_that_use_template(self
, tpl_name
):
1093 #first find the template
1096 #Look fortemplate with the good name
1097 if h
.is_tpl() and hasattr(h
, 'name') and h
.name
== tpl_name
:
1100 #If we find noone, we return nothing (easy case:) )
1104 #Ok, we find the tpl
1106 if tpl
in h
.templates
and hasattr(h
, 'host_name'):
1107 res
.append(h
.host_name
)
1112 # Will create all business tree for the
1114 def create_business_rules(self
, hosts
, services
):
1116 h
.create_business_rules(hosts
, services
)
1119 # Will link all business service/host with theirs
1120 # dep for problem/impact link
1121 def create_business_rules_dependencies(self
):
1123 h
.create_business_rules_dependencies()