Fix : Nagios allow elements without contacts. Do alike.
[shinken.git] / shinken / objects / host.py
blobdb861b1ec1ae9680da9cd60e66611710f19c9602
1 #!/usr/bin/env python
2 #Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
5 # Gregory Starck, g.starck@gmail.com
6 # Hartmut Goebel, h.goebel@goebel-consult.de
8 #This file is part of Shinken.
10 #Shinken is free software: you can redistribute it and/or modify
11 #it under the terms of the GNU Affero General Public License as published by
12 #the Free Software Foundation, either version 3 of the License, or
13 #(at your option) any later version.
15 #Shinken is distributed in the hope that it will be useful,
16 #but WITHOUT ANY WARRANTY; without even the implied warranty of
17 #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 #GNU Affero General Public License for more details.
20 #You should have received a copy of the GNU Affero General Public License
21 #along with Shinken. If not, see <http://www.gnu.org/licenses/>.
23 """ This is the main class for the Host. In fact it's mainly
24 about the configuration part. for the running one, it's better
25 to look at the schedulingitem class that manage all
26 scheduling/consome check smart things :)
27 """
29 import time
31 from shinken.objects import Items, SchedulingItem
32 from shinken.autoslots import AutoSlots
33 from shinken.util import format_t_into_dhms_format, to_hostnames_list, get_obj_name, to_svc_hst_distinct_lists, to_list_string_of_names
34 from shinken.property import BoolProp, IntegerProp, FloatProp, CharProp, StringProp, ListProp
35 from shinken.graph import Graph
36 from shinken.macroresolver import MacroResolver
37 from shinken.eventhandler import EventHandler
38 from shinken.log import logger
41 class Host(SchedulingItem):
42 #AutoSlots create the __slots__ with properties and
43 #running_properties names
44 __metaclass__ = AutoSlots
46 id = 1 #0 is reserved for host (primary node for parents)
47 ok_up = 'UP'
48 my_type = 'host'
51 # properties defined by configuration
52 # *required : is required in conf
53 # *default : default value if no set in conf
54 # *pythonize : function to call when transfort string to python object
55 # *fill_brok : if set, send to broker. there are two categories: full_status for initial and update status, check_result for check results
56 # *no_slots : do not take this property for __slots__
57 # Only for the inital call
58 # conf_send_preparation : if set, will pass the property to this function. It's used to "flatten"
59 # some dangerous properties like realms that are too 'linked' to be send like that.
60 # brok_transformation : if set, will call the function with the value of the property
61 # the major times it will be to flatten the data (like realm_name instead of the realm object).
62 properties = {
63 'host_name': StringProp(fill_brok=['full_status', 'check_result', 'next_schedule']),
64 'alias': StringProp(fill_brok=['full_status']),
65 'display_name': StringProp(default='none', fill_brok=['full_status']),
66 'address': StringProp(fill_brok=['full_status']),
67 'parents': ListProp(brok_transformation=to_hostnames_list, default='', fill_brok=['full_status']),
68 'hostgroups': StringProp(brok_transformation=to_list_string_of_names, default='', fill_brok=['full_status']),
69 'check_command': StringProp(default='_internal_host_up', fill_brok=['full_status']),
70 'initial_state': CharProp(default='u', fill_brok=['full_status']),
71 'max_check_attempts': IntegerProp(fill_brok=['full_status']),
72 'check_interval': IntegerProp(default='0', fill_brok=['full_status']),
73 'retry_interval': IntegerProp(default='0', fill_brok=['full_status']),
74 'active_checks_enabled': BoolProp(default='1', fill_brok=['full_status']),
75 'passive_checks_enabled': BoolProp(default='1', fill_brok=['full_status']),
76 'check_period': StringProp(fill_brok=['full_status']),
77 'obsess_over_host': BoolProp(default='0', fill_brok=['full_status']),
78 'check_freshness': BoolProp(default='0', fill_brok=['full_status']),
79 'freshness_threshold': IntegerProp(default='0', fill_brok=['full_status']),
80 'event_handler': StringProp(default='', fill_brok=['full_status']),
81 'event_handler_enabled': BoolProp(default='0', fill_brok=['full_status']),
82 'low_flap_threshold': IntegerProp(default='25', fill_brok=['full_status']),
83 'high_flap_threshold': IntegerProp(default='50', fill_brok=['full_status']),
84 'flap_detection_enabled': BoolProp(default='1', fill_brok=['full_status']),
85 'flap_detection_options': ListProp(default='o,d,u', fill_brok=['full_status']),
86 'process_perf_data': BoolProp(default='1', fill_brok=['full_status']),
87 'retain_status_information': BoolProp(default='1', fill_brok=['full_status']),
88 'retain_nonstatus_information': BoolProp(default='1', fill_brok=['full_status']),
89 'contacts': StringProp(default='', fill_brok=['full_status']),
90 'contact_groups': StringProp(default='', fill_brok=['full_status']),
91 'notification_interval': IntegerProp(default='60', fill_brok=['full_status']),
92 'first_notification_delay': IntegerProp(default='0', fill_brok=['full_status']),
93 'notification_period': StringProp(fill_brok=['full_status']),
94 'notification_options': ListProp(default='d,u,r,f', fill_brok=['full_status']),
95 'notifications_enabled': BoolProp(default='1', fill_brok=['full_status']),
96 'stalking_options': ListProp(default='', fill_brok=['full_status']),
97 'notes': StringProp(default='', fill_brok=['full_status']),
98 'notes_url': StringProp(default='', fill_brok=['full_status']),
99 'action_url': StringProp(default='', fill_brok=['full_status']),
100 'icon_image': StringProp(default='', fill_brok=['full_status']),
101 'icon_image_alt': StringProp(default='', fill_brok=['full_status']),
102 'vrml_image': StringProp(default='', fill_brok=['full_status']),
103 'statusmap_image': StringProp(default='', fill_brok=['full_status']),
105 # No slots for this 2 because begin property by a number seems bad
106 # it's stupid!
107 '2d_coords': StringProp(default='', fill_brok=['full_status'], no_slots=True),
108 '3d_coords': StringProp(default='', fill_brok=['full_status'], no_slots=True),
109 'failure_prediction_enabled': BoolProp(default='0', fill_brok=['full_status']),
111 ### New to shinken
112 # 'fill_brok' is ok because in scheduler it's already
113 # a string from conf_send_preparation
114 'realm': StringProp(default=None, fill_brok=['full_status'], conf_send_preparation=get_obj_name),
115 'poller_tag': StringProp(default='None'),
116 'reactionner_tag': StringProp(default='None'),
117 'resultmodulations': StringProp(default=''),
118 'escalations': StringProp(default='', fill_brok=['full_status']),
119 'maintenance_period': StringProp(default='', fill_brok=['full_status']),
121 # Criticity value
122 'criticity': IntegerProp(default='3', fill_brok=['full_status']),
125 # properties set only for running purpose
126 # retention : save/load this property from retention
127 running_properties = {
128 'last_chk': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
129 'next_chk': IntegerProp(default=0, fill_brok=['full_status', 'next_schedule']),
130 'in_checking': BoolProp(default=False, fill_brok=['full_status', 'check_result', 'next_schedule']),
131 'latency': FloatProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
132 'attempt': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
133 'state': StringProp(default='PENDING', fill_brok=['full_status'], retention=True),
134 'state_id': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
135 'state_type': StringProp(default='HARD', fill_brok=['full_status'], retention=True),
136 'state_type_id': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
137 'current_event_id': StringProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
138 'last_event_id': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
139 'last_state': StringProp(default='PENDING', fill_brok=['full_status'], retention=True),
140 'last_state_id': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
141 'last_state_type' : StringProp(default='HARD', fill_brok=['full_status'], retention=True),
142 'last_state_change': FloatProp(default=time.time(), fill_brok=['full_status'], retention=True),
143 'last_hard_state_change': FloatProp(default=time.time(), fill_brok=['full_status', 'check_result'], retention=True),
144 'last_hard_state': StringProp(default='PENDING', fill_brok=['full_status'], retention=True),
145 'last_hard_state_id' : IntegerProp(default=0, fill_brok=['full_status'], retention=True),
146 'last_time_up': IntegerProp(default=int(time.time()), fill_brok=['full_status', 'check_result'], retention=True),
147 'last_time_down': IntegerProp(default=int(time.time()), fill_brok=['full_status', 'check_result'], retention=True),
148 'last_time_unreachable': IntegerProp(default=int(time.time()), fill_brok=['full_status', 'check_result'], retention=True),
149 'duration_sec': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
150 'output': StringProp(default='', fill_brok=['full_status', 'check_result'], retention=True),
151 'long_output': StringProp(default='', fill_brok=['full_status', 'check_result'], retention=True),
152 'is_flapping': BoolProp(default=False, fill_brok=['full_status'], retention=True),
153 'flapping_comment_id': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
154 # No broks for _depend_of because of to much links to hosts/services
155 # dependencies for actions like notif of event handler, so AFTER check return
156 'act_depend_of': StringProp(default=[]),
158 # dependencies for checks raise, so BEFORE checks
159 'chk_depend_of': StringProp(default=[]),
161 # elements that depend of me, so the reverse than just uppper
162 'act_depend_of_me': StringProp(default=[]),
164 # elements that depend of me
165 'chk_depend_of_me': StringProp(default=[]),
166 'last_state_update': StringProp(default=time.time(), fill_brok=['full_status'], retention=True),
168 # no brok ,to much links
169 'services': StringProp(default=[]),
171 # No broks, it's just internal, and checks have too links
172 'checks_in_progress': StringProp(default=[]),
174 # No broks, it's just internal, and checks have too links
175 'notifications_in_progress': StringProp(default={}, retention=True),
176 'downtimes': StringProp(default=[], fill_brok=['full_status'], retention=True),
177 'comments': StringProp(default=[], fill_brok=['full_status'], retention=True),
178 'flapping_changes': StringProp(default=[], fill_brok=['full_status'], retention=True),
179 'percent_state_change': FloatProp(default=0.0, fill_brok=['full_status'], retention=True),
180 'problem_has_been_acknowledged': BoolProp(default=False, fill_brok=['full_status'], retention=True),
181 'acknowledgement': StringProp(default=None, retention=True),
182 'acknowledgement_type': IntegerProp(default=1, fill_brok=['full_status', 'check_result'], retention=True),
183 'check_type': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
184 'has_been_checked': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
185 'should_be_scheduled': IntegerProp(default=1, fill_brok=['full_status'], retention=True),
186 'last_problem_id': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
187 'current_problem_id': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
188 'execution_time': FloatProp(default=0.0, fill_brok=['full_status', 'check_result'], retention=True),
189 'last_notification': FloatProp(default=time.time(), fill_brok=['full_status'], retention=True),
190 'current_notification_number': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
191 'current_notification_id': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
192 'check_flapping_recovery_notification': BoolProp(default=True, fill_brok=['full_status'], retention=True),
193 'scheduled_downtime_depth': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
194 'pending_flex_downtime': IntegerProp(default=0, fill_brok=['full_status'], retention=True),
195 'timeout': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
196 'start_time': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
197 'end_time': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
198 'early_timeout': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
199 'return_code': IntegerProp(default=0, fill_brok=['full_status', 'check_result'], retention=True),
200 'perf_data': StringProp(default='', fill_brok=['full_status', 'check_result'], retention=True),
201 'last_perf_data': StringProp(default='', retention=True),
202 'customs': StringProp(default={}, fill_brok=['full_status']),
203 'got_default_realm' : BoolProp(default=False),
205 # use for having all contacts we have notified
206 'notified_contacts': StringProp(default=set()),
208 'in_scheduled_downtime': BoolProp(default=False, retention=True),
209 'in_scheduled_downtime_during_last_check': BoolProp(default=False, retention=True),
211 # put here checks and notif raised
212 'actions': StringProp(default=[]),
213 # and here broks raised
214 'broks': StringProp(default=[]),
216 # For knowing with which elements we are in relation
217 # of dep.
218 # childs are the hosts that have US as parent, so
219 # only a network dep
220 'childs': StringProp(brok_transformation=to_hostnames_list, default=[], fill_brok=['full_status']),
221 # Here it's the elements we are depending on
222 # so our parents as network relation, or a host
223 # we are depending in a hostdependency
224 # or even if we are businesss based.
225 'parent_dependencies' : StringProp(brok_transformation=to_svc_hst_distinct_lists, default=[], fill_brok=['full_status']),
226 # Here it's the guys taht depend on us. So it's the total
227 # oposite of the parent_dependencies
228 'child_dependencies': StringProp(
229 brok_transformation=to_svc_hst_distinct_lists,
230 default=[],
231 fill_brok=['full_status']),
233 # All errors and warning raised during the configuration parsing
234 # and taht will raised real warning/errors during the is_correct
235 'configuration_warnings': StringProp(default=[]),
236 'configuration_errors': StringProp(default=[]),
238 ### Problem/impact part
239 'is_problem': StringProp(default=False, fill_brok=['full_status']),
240 'is_impact': StringProp(default=False, fill_brok=['full_status']),
241 # the save value of our criticity for "problems"
242 'my_own_criticity': IntegerProp(default=-1),
244 # list of problems that make us an impact
245 'source_problems': StringProp(brok_transformation=to_svc_hst_distinct_lists, default=[], fill_brok=['full_status']),
247 # list of the impact I'm the cause of
248 'impacts': StringProp(brok_transformation=to_svc_hst_distinct_lists, default=[], fill_brok=['full_status']),
250 # keep a trace of the old state before being an impact
251 'state_before_impact': StringProp(default='PENDING'),
252 # keep a trace of the old state id before being an impact
253 'state_id_before_impact': StringProp(default=0),
254 # if the state change, we know so we do not revert it
255 'state_changed_since_impact': StringProp(default=False),
257 #BUSINESS CORRELATOR PART
258 # Say if we are business based rule or not
259 'got_business_rule' : BoolProp(default=False, fill_brok=['full_status']),
260 # Our Dependency node for the business rule
261 'business_rule' : StringProp(default=None),
263 # Manage the unkown/unreach during hard state
264 # From now its not really used
265 'in_hard_unknown_reach_phase' : BoolProp(default=False, retention=True),
266 'was_in_hard_unknown_reach_phase' : BoolProp(default=False, retention=True),
267 'state_before_hard_unknown_reach_phase' : StringProp(default='UP', retention=True),
270 # Hosts macros and prop that give the information
271 # the prop can be callable or not
272 macros = {
273 'HOSTNAME': 'host_name',
274 'HOSTDISPLAYNAME': 'display_name',
275 'HOSTALIAS': 'alias',
276 'HOSTADDRESS': 'address',
277 'HOSTSTATE': 'state',
278 'HOSTSTATEID': 'state_id',
279 'LASTHOSTSTATE': 'last_state',
280 'LASTHOSTSTATEID': 'last_state_id',
281 'HOSTSTATETYPE': 'state_type',
282 'HOSTATTEMPT': 'attempt',
283 'MAXHOSTATTEMPTS': 'max_check_attempts',
284 'HOSTEVENTID': 'current_event_id',
285 'LASTHOSTEVENTID': 'last_event_id',
286 'HOSTPROBLEMID': 'current_problem_id',
287 'LASTHOSTPROBLEMID': 'last_problem_id',
288 'HOSTLATENCY': 'latency',
289 'HOSTEXECUTIONTIME': 'execution_time',
290 'HOSTDURATION': 'get_duration',
291 'HOSTDURATIONSEC': 'get_duration_sec',
292 'HOSTDOWNTIME': 'get_downtime',
293 'HOSTPERCENTCHANGE': 'percent_state_change',
294 'HOSTGROUPNAME': 'get_groupname',
295 'HOSTGROUPNAMES': 'get_groupnames',
296 'LASTHOSTCHECK': 'last_chk',
297 'LASTHOSTSTATECHANGE': 'last_state_change',
298 'LASTHOSTUP': 'last_time_up',
299 'LASTHOSTDOWN': 'last_time_down',
300 'LASTHOSTUNREACHABLE': 'last_time_unreachable',
301 'HOSTOUTPUT': 'output',
302 'LONGHOSTOUTPUT': 'long_output',
303 'HOSTPERFDATA': 'perf_data',
304 'LASTHOSTPERFDATA': 'last_perf_data',
305 'HOSTCHECKCOMMAND': 'get_check_command',
306 'HOSTACKAUTHOR': 'get_ack_author_name',
307 'HOSTACKAUTHORNAME': 'get_ack_author_name',
308 'HOSTACKAUTHORALIAS': 'get_ack_author_name',
309 'HOSTACKCOMMENT': 'get_ack_comment',
310 'HOSTACTIONURL': 'action_url',
311 'HOSTNOTESURL': 'notes_url',
312 'HOSTNOTES': 'notes',
313 'TOTALHOSTSERVICES': 'get_total_services',
314 'TOTALHOSTSERVICESOK': 'get_total_services_ok',
315 'TOTALHOSTSERVICESWARNING': 'get_total_services_warning',
316 'TOTALHOSTSERVICESUNKNOWN': 'get_total_services_unknown',
317 'TOTALHOSTSERVICESCRITICAL': 'get_total_services_critical'
321 # This tab is used to transform old parameters name into new ones
322 # so from Nagios2 format, to Nagios3 ones
323 old_properties = {
324 'normal_check_interval': 'check_interval',
325 'retry_check_interval': 'retry_interval'
329 def clean(self):
330 pass
333 # Call by picle for data-ify the host
334 # we do a dict because list are too dangerous for
335 # retention save and co :( even if it's more
336 # extensive
337 # The setstate function do the inverse
338 def __getstate__(self):
339 cls = self.__class__
340 # id is not in *_properties
341 res = {'id' : self.id}
342 for prop in cls.properties:
343 if hasattr(self, prop):
344 res[prop] = getattr(self, prop)
345 for prop in cls.running_properties:
346 if hasattr(self, prop):
347 res[prop] = getattr(self, prop)
348 return res
351 # Inversed funtion of getstate
352 def __setstate__(self, state):
353 cls = self.__class__
354 self.id = state['id']
355 for prop in cls.properties:
356 if prop in state:
357 setattr(self, prop, state[prop])
358 for prop in cls.running_properties:
359 if prop in state:
360 setattr(self, prop, state[prop])
364 # Fill adresse with host_name if not already set
365 def fill_predictive_missing_parameters(self):
366 if hasattr(self, 'host_name') and not hasattr(self, 'address'):
367 self.address = self.host_name
368 if hasattr(self, 'host_name') and not hasattr(self, 'alias'):
369 self.alias = self.host_name
373 # Check is required prop are set:
374 # contacts OR contactgroups is need
375 def is_correct(self):
376 state = True #guilty or not? :)
377 cls = self.__class__
379 special_properties = ['check_period', 'notification_interval', 'check_period']
380 for prop, entry in cls.properties.items():
381 if prop not in special_properties:
382 if not hasattr(self, prop) and entry.required:
383 logger.log("%s : I do not have %s" % (self.get_name(), prop))
384 state = False #Bad boy...
386 # Raised all previously saw errors like unknown contacts and co
387 if self.configuration_errors != []:
388 state = False
389 for err in self.configuration_errors:
390 logger.log(err)
392 # Ok now we manage special cases...
393 if self.notifications_enabled and self.contacts == []:
394 logger.log("Waring : the host %s do not have contacts nor contact_groups" % self.get_name())
396 if getattr(self, 'check_command', None) is None:
397 logger.log("%s : I've got no check_command" % self.get_name())
398 state = False
399 # Ok got a command, but maybe it's invalid
400 else:
401 if not self.check_command.is_valid():
402 logger.log("%s : my check_command %s is invalid" % (self.get_name(), self.check_command.command))
403 state = False
404 if self.got_business_rule:
405 if not self.business_rule.is_valid():
406 logger.log("%s : my business rule is invalid" % (self.get_name(),))
407 for bperror in self.business_rule.configuration_errors:
408 logger.log("%s : %s" % (self.get_name(), bperror))
409 state = False
411 if not hasattr(self, 'notification_interval') and self.notifications_enabled == True:
412 logger.log("%s : I've got no notification_interval but I've got notifications enabled" % self.get_name())
413 state = False
415 # If active check is enabled with a check_interval!=0, we must have a check_period
416 if ( getattr(self, 'active_checks_enabled', False)
417 and getattr(self, 'check_period', None) is None
418 and getattr(self, 'check_interval', 1) != 0 ):
419 logger.log("%s : My check_period is not correct" % self.get_name())
420 state = False
422 if getattr(self, 'realm', None) is None:
423 logger.log("%s : My realm is not correct" % self.get_name())
424 state = False
425 if not hasattr(self, 'check_period'):
426 self.check_period = None
427 if hasattr(self, 'host_name'):
428 for c in cls.illegal_object_name_chars:
429 if c in self.host_name:
430 logger.log("%s : My host_name got the caracter %s that is not allowed." % (self.get_name(), c))
431 state = False
432 return state
435 # Search in my service if I've got the service
436 def find_service_by_name(self, service_description):
437 for s in self.services:
438 if s.service_description == service_description:
439 return s
440 return None
443 # Macro part
444 def get_total_services(self):
445 return str(len(self.services))
448 def get_total_services_ok(self):
449 return str(len([s for s in self.services if s.state_id == 0]))
452 def get_total_services_warning(self):
453 return str(len([s for s in self.services if s.state_id == 1]))
456 def get_total_services_critical(self):
457 return str(len([s for s in self.services if s.state_id == 2]))
460 def get_total_services_unknown(self):
461 return str(len([s for s in self.services if s.state_id == 3]))
464 def get_ack_author_name(self):
465 if self.acknowledgement is None:
466 return ''
467 return self.acknowledgement.author
470 def get_ack_comment(self):
471 if self.acknowledgement is None:
472 return ''
473 return self.acknowledgement.comment
476 def get_check_command(self):
477 return self.check_command.get_name()
480 # For get a nice name
481 def get_name(self):
482 if not self.is_tpl():
483 try:
484 return self.host_name
485 except AttributeError: # outch, no hostname
486 return 'UNNAMEDHOST'
487 else:
488 try:
489 return self.name
490 except AttributeError: # outch, no name for this template
491 return 'UNNAMEDHOSTTEMPLATE'
494 # For debugin purpose only
495 def get_dbg_name(self):
496 return self.host_name
499 # Say if we got the other in one of your dep list
500 def is_linked_with_host(self, other):
501 for (h, status, type, timeperiod, inherits_parent) in self.act_depend_of:
502 if h == other:
503 return True
504 return False
507 # Delete all links in the act_depend_of list of self and other
508 def del_host_act_dependancy(self, other):
509 to_del = []
510 # First we remove in my list
511 for (h, status, type, timeperiod, inherits_parent) in self.act_depend_of:
512 if h == other:
513 to_del.append( (h, status, type, timeperiod, inherits_parent))
514 for t in to_del:
515 self.act_depend_of.remove(t)
517 #And now in the father part
518 to_del = []
519 for (h, status, type, timeperiod, inherits_parent) in other.act_depend_of_me:
520 if h == self:
521 to_del.append( (h, status, type, timeperiod, inherits_parent) )
522 for t in to_del:
523 other.act_depend_of_me.remove(t)
526 # Add a dependancy for action event handler, notification, etc)
527 # and add ourself in it's dep list
528 def add_host_act_dependancy(self, h, status, timeperiod, inherits_parent):
529 # I add him in MY list
530 self.act_depend_of.append( (h, status, 'logic_dep', timeperiod, inherits_parent) )
531 # And I add me in it's list
532 h.act_depend_of_me.append( (self, status, 'logic_dep', timeperiod, inherits_parent) )
534 # And the parent/child dep lists too
535 h.register_son_in_parent_child_dependencies(self)
538 # Register the dependancy between 2 service for action (notification etc)
539 # but based on a BUSINESS rule, so on fact:
540 # ERP depend on database, so we fill just database.act_depend_of_me
541 # because we will want ERP mails to go on! So call this
542 # on the database service with the srv=ERP service
543 def add_business_rule_act_dependancy(self, h, status, timeperiod, inherits_parent):
544 # first I add the other the I depend on in MY list
545 # self.act_depend_of.append( (srv, status, 'logic_dep',
546 # timeperiod, inherits_parent) )
547 # I only register so he know that I WILL be a inpact
548 self.act_depend_of_me.append( (h, status, 'business_dep',
549 timeperiod, inherits_parent) )
551 # And the parent/child dep lists too
552 self.register_son_in_parent_child_dependencies(h)
555 # Add a dependancy for check (so before launch)
556 def add_host_chk_dependancy(self, h, status, timeperiod, inherits_parent):
557 # I add him in MY list
558 self.chk_depend_of.append( (h, status, 'logic_dep', timeperiod, inherits_parent) )
559 # And I add me in it's list
560 h.chk_depend_of_me.append( (self, status, 'logic_dep', timeperiod, inherits_parent) )
562 # And we fill parent/childs dep for brok purpose
563 # Here self depend on h
564 h.register_son_in_parent_child_dependencies(self)
567 # Add one of our service to services (at linkify)
568 def add_service_link(self, service):
569 self.services.append(service)
572 # Set unreachable : all our parents are down!
573 # We have a special state, but state was already set, we just need to
574 # update it. We are no DOWN, we are UNREACHABLE and
575 # got a state id is 2
576 def set_unreachable(self):
577 now = time.time()
578 self.state_id = 2
579 self.state = 'UNREACHABLE'
580 self.last_time_unreachable = int(now)
583 # We just go an impact, so we go unreachable
584 # But only if we enable this stte change in the conf
585 def set_impact_state(self):
586 cls = self.__class__
587 if cls.enable_problem_impacts_states_change:
588 # Keep a trace of the old state (problem came back before
589 # a new checks)
590 self.state_before_impact = self.state
591 self.state_id_before_impact = self.state_id
592 # This flag will know if we overide the impact state
593 self.state_changed_since_impact = False
594 self.state = 'UNREACHABLE'#exit code UNDETERMINED
595 self.state_id = 2
598 # Ok, we are no more an impact, if no news checks
599 # overide the impact state, we came back to old
600 # states
601 # And only if impact state change is set in configuration
602 def unset_impact_state(self):
603 cls = self.__class__
604 if cls.enable_problem_impacts_states_change and not self.state_changed_since_impact:
605 self.state = self.state_before_impact
606 self.state_id = self.state_id_before_impact
609 # set the state in UP, DOWN, or UNDETERMINED
610 # with the status of a check. Also update last_state
611 def set_state_from_exit_status(self, status):
612 now = time.time()
613 self.last_state_update = now
615 # we should put in last_state the good last state:
616 # if not just change the state by an problem/impact
617 # we can take current state. But if it's the case, the
618 # real old state is self.state_before_impact (it's teh TRUE
619 # state in fact)
620 # And only if we enable the impact state change
621 cls = self.__class__
622 if cls.enable_problem_impacts_states_change and self.is_impact and not self.state_changed_since_impact:
623 self.last_state = self.state_before_impact
624 else:
625 self.last_state = self.state
627 if status == 0:
628 self.state = 'UP'
629 self.state_id = 0
630 self.last_time_up = int(self.last_state_update)
631 state_code = 'u'
632 elif status in (1, 2, 3):
633 self.state = 'DOWN'
634 self.state_id = 1
635 self.last_time_down = int(self.last_state_update)
636 state_code = 'd'
637 else:
638 self.state = 'DOWN'#exit code UNDETERMINED
639 self.state_id = 1
640 self.last_time_down = int(self.last_state_update)
641 state_code = 'd'
642 if state_code in self.flap_detection_options:
643 self.add_flapping_change(self.state != self.last_state)
644 if self.state != self.last_state:
645 self.last_state_change = self.last_state_update
646 self.duration_sec = now - self.last_state_change
649 # See if status is status. Can be low of high format (o/UP, d/DOWN, ...)
650 def is_state(self, status):
651 if status == self.state:
652 return True
653 # Now low status
654 elif status == 'o' and self.state == 'UP':
655 return True
656 elif status == 'd' and self.state == 'DOWN':
657 return True
658 elif status == 'u' and self.state == 'UNREACHABLE':
659 return True
660 return False
663 # The last time when the state was not UP
664 def last_time_non_ok_or_up(self):
665 if self.last_time_down > self.last_time_up:
666 last_time_non_up = self.last_time_down
667 else:
668 last_time_non_up = 0
669 return last_time_non_up
672 # Add a log entry with a HOST ALERT like:
673 # HOST ALERT: server;DOWN;HARD;1;I don't know what to say...
674 def raise_alert_log_entry(self):
675 logger.log('HOST ALERT: %s;%s;%s;%d;%s' % (self.get_name(), self.state, self.state_type, self.attempt, self.output))
678 # Add a log entry with a Freshness alert like:
679 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
680 # I'm forcing an immediate check of the host.
681 def raise_freshness_log_entry(self, t_stale_by, t_threshold):
682 logger.log("Warning: The results of host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the host." \
683 % (self.get_name(), format_t_into_dhms_format(t_stale_by), format_t_into_dhms_format(t_threshold)))
686 # Raise a log entry with a Notification alert like
687 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
688 def raise_notification_log_entry(self, n):
689 contact = n.contact
690 command = n.command_call
691 if n.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
692 state = '%s (%s)' % (n.type, self.state)
693 else:
694 state = self.state
695 if self.__class__.log_notifications:
696 logger.log("HOST NOTIFICATION: %s;%s;%s;%s;%s" % (contact.get_name(), self.get_name(), state, \
697 command.get_name(), self.output))
699 # Raise a log entry with a Eventhandler alert like
700 # HOST NOTIFICATION: superadmin;server;UP;notify-by-rss;no output
701 def raise_event_handler_log_entry(self, command):
702 if self.__class__.log_event_handlers:
703 logger.log("HOST EVENT HANDLER: %s;%s;%s;%s;%s" % (self.get_name(), self.state, self.state_type, self.attempt, \
704 command.get_name()))
707 #Raise a log entry with FLAPPING START alert like
708 #HOST FLAPPING ALERT: server;STARTED; Host appears to have started flapping (50.6% change >= 50.0% threshold)
709 def raise_flapping_start_log_entry(self, change_ratio, threshold):
710 logger.log("HOST FLAPPING ALERT: %s;STARTED; Host appears to have started flapping (%.1f% change >= %.1% threshold)" % \
711 (self.get_name(), change_ratio, threshold))
714 #Raise a log entry with FLAPPING STOP alert like
715 #HOST FLAPPING ALERT: server;STOPPED; host appears to have stopped flapping (23.0% change < 25.0% threshold)
716 def raise_flapping_stop_log_entry(self, change_ratio, threshold):
717 logger.log("HOST FLAPPING ALERT: %s;STOPPED; Host appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
718 (self.get_name(), change_ratio, threshold))
721 #If there is no valid time for next check, raise a log entry
722 def raise_no_next_check_log_entry(self):
723 logger.log("Warning : I cannot schedule the check for the host '%s' because there is not future valid time" % \
724 (self.get_name()))
726 #Raise a log entry when a downtime begins
727 #HOST DOWNTIME ALERT: test_host_0;STARTED; Host has entered a period of scheduled downtime
728 def raise_enter_downtime_log_entry(self):
729 logger.log("HOST DOWNTIME ALERT: %s;STARTED; Host has entered a period of scheduled downtime" % \
730 (self.get_name()))
733 #Raise a log entry when a downtime has finished
734 #HOST DOWNTIME ALERT: test_host_0;STOPPED; Host has exited from a period of scheduled downtime
735 def raise_exit_downtime_log_entry(self):
736 logger.log("HOST DOWNTIME ALERT: %s;STOPPED; Host has exited from a period of scheduled downtime" % \
737 (self.get_name()))
740 #Raise a log entry when a downtime prematurely ends
741 #HOST DOWNTIME ALERT: test_host_0;CANCELLED; Service has entered a period of scheduled downtime
742 def raise_cancel_downtime_log_entry(self):
743 logger.log("HOST DOWNTIME ALERT: %s;CANCELLED; Scheduled downtime for host has been cancelled." % \
744 (self.get_name()))
747 #Is stalking ?
748 #Launch if check is waitconsume==first time
749 #and if c.status is in self.stalking_options
750 def manage_stalking(self, c):
751 need_stalk = False
752 if c.status == 'waitconsume':
753 if c.exit_status == 0 and 'o' in self.stalking_options:
754 need_stalk = True
755 elif c.exit_status == 1 and 'd' in self.stalking_options:
756 need_stalk = True
757 elif c.exit_status == 2 and 'd' in self.stalking_options:
758 need_stalk = True
759 elif c.exit_status == 3 and 'u' in self.stalking_options:
760 need_stalk = True
761 if c.output != self.output:
762 need_stalk = False
763 if need_stalk:
764 logger.log("Stalking %s : %s" % (self.get_name(), self.output))
767 #fill act_depend_of with my parents (so network dep)
768 #and say parents they impact me, no timeperiod and folow parents of course
769 def fill_parents_dependancie(self):
770 for parent in self.parents:
771 if parent is not None:
772 #I add my parent in my list
773 self.act_depend_of.append( (parent, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
775 #And I register myself in my parent list too
776 parent.register_child(self)
778 # And add the parent/child dep filling too, for broking
779 parent.register_son_in_parent_child_dependencies(self)
782 # Register a child in our lists
783 def register_child(self, child):
784 # We've got 2 list : a list for our child
785 # where we just put the pointer, it's jsut for broking
786 # and anotehr with all data, useful for 'running' part
787 self.childs.append(child)
788 self.act_depend_of_me.append( (child, ['d', 'u', 's', 'f'], 'network_dep', None, True) )
791 #Give data for checks's macros
792 def get_data_for_checks(self):
793 return [self]
795 #Give data for event handler's macro
796 def get_data_for_event_handler(self):
797 return [self]
799 #Give data for notifications'n macros
800 def get_data_for_notifications(self, contact, n):
801 return [self, contact, n]
804 #See if the notification is launchable (time is OK and contact is OK too)
805 def notification_is_blocked_by_contact(self, n, contact):
806 return not contact.want_host_notification(self.last_chk, self.state, n.type, self.criticity)
809 #MACRO PART
810 def get_duration_sec(self):
811 return str(int(self.duration_sec))
814 def get_duration(self):
815 m, s = divmod(self.duration_sec, 60)
816 h, m = divmod(m, 60)
817 return "%02dh %02dm %02ds" % (h, m, s)
820 #Check if a notification for this host is suppressed at this time
821 #This is a check at the host level. Do not look at contacts here
822 def notification_is_blocked_by_item(self, type, t_wished = None):
823 if t_wished is None:
824 t_wished = time.time()
826 # TODO
827 # forced notification -> false
828 # custom notification -> false
830 # Block if notifications are program-wide disabled
831 if not self.enable_notifications:
832 return True
834 # Does the notification period allow sending out this notification?
835 if not self.notification_period.is_time_valid(t_wished):
836 return True
838 # Block if notifications are disabled for this host
839 if not self.notifications_enabled:
840 return True
842 # Block if the current status is in the notification_options d,u,r,f,s
843 if 'n' in self.notification_options:
844 return True
846 if type in ('PROBLEM', 'RECOVERY'):
847 if self.state == 'DOWN' and not 'd' in self.notification_options:
848 return True
849 if self.state == 'UP' and not 'r' in self.notification_options:
850 return True
851 if self.state == 'UNREACHABLE' and not 'u' in self.notification_options:
852 return True
853 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
854 and not 'f' in self.notification_options):
855 return True
856 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
857 and not 's' in self.notification_options):
858 return True
860 # Acknowledgements make no sense when the status is ok/up
861 if type == 'ACKNOWLEDGEMENT':
862 if self.state == self.ok_up:
863 return True
865 # Flapping
866 if type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
867 # todo block if not notify_on_flapping
868 if self.scheduled_downtime_depth > 0:
869 return True
871 # When in deep downtime, only allow end-of-downtime notifications
872 # In depth 1 the downtime just started and can be notified
873 if self.scheduled_downtime_depth > 1 and not type in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
874 return True
876 # Block if in a scheduled downtime and a problem arises
877 if self.scheduled_downtime_depth > 0 and type in ('PROBLEM', 'RECOVERY'):
878 return True
880 # Block if the status is SOFT
881 if self.state_type == 'SOFT' and type == 'PROBLEM':
882 return True
884 # Block if the problem has already been acknowledged
885 if self.problem_has_been_acknowledged and type != 'ACKNOWLEDGEMENT':
886 return True
888 # Block if flapping
889 if self.is_flapping:
890 return True
892 return False
895 #Get a oc*p command if item has obsess_over_*
896 #command. It must be enabled locally and globally
897 def get_obsessive_compulsive_processor_command(self):
898 cls = self.__class__
899 if not cls.obsess_over or not self.obsess_over_host:
900 return
902 m = MacroResolver()
903 data = self.get_data_for_event_handler()
904 cmd = m.resolve_command(cls.ochp_command, data)
905 e = EventHandler(cmd, timeout=cls.ochp_timeout)
907 #ok we can put it in our temp action queue
908 self.actions.append(e)
913 class Hosts(Items):
914 name_property = "host_name" #use for the search by name
915 inner_class = Host #use for know what is in items
918 #prepare_for_conf_sending to flatten some properties
919 def prepare_for_sending(self):
920 for h in self:
921 h.prepare_for_conf_sending()
924 #Create link between elements:
925 #hosts -> timeperiods
926 #hosts -> hosts (parents, etc)
927 #hosts -> commands (check_command)
928 #hosts -> contacts
929 def linkify(self, timeperiods=None, commands=None, contacts=None, realms=None, resultmodulations=None, escalations=None, hostgroups=None):
930 self.linkify_with_timeperiods(timeperiods, 'notification_period')
931 self.linkify_with_timeperiods(timeperiods, 'check_period')
932 self.linkify_with_timeperiods(timeperiods, 'maintenance_period')
933 self.linkify_h_by_h()
934 self.linkify_h_by_hg(hostgroups)
935 self.linkify_one_command_with_commands(commands, 'check_command')
936 self.linkify_one_command_with_commands(commands, 'event_handler')
938 self.linkify_with_contacts(contacts)
939 self.linkify_h_by_realms(realms)
940 self.linkify_with_resultmodulations(resultmodulations)
941 #WARNING: all escalations will not be link here
942 #(just the escalation here, not serviceesca or hostesca).
943 #This last one will be link in escalations linkify.
944 self.linkify_with_escalations(escalations)
947 #Fill adress by host_name if not set
948 def fill_predictive_missing_parameters(self):
949 for h in self:
950 h.fill_predictive_missing_parameters()
953 #Link host with hosts (parents)
954 def linkify_h_by_h(self):
955 for h in self:
956 parents = h.parents
957 #The new member list
958 new_parents = []
959 for parent in parents:
960 parent = parent.strip()
961 p = self.find_by_name(parent)
962 if p is not None:
963 new_parents.append(p)
964 else:
965 err = "Error : the parent '%s' on host '%s' is unknown!" % (parent, h.get_name())
966 self.configuration_errors.append(err)
967 #print "Me,", h.host_name, "define my parents", new_parents
968 #We find the id, we remplace the names
969 h.parents = new_parents
972 #Link with realms and set a default realm if none
973 def linkify_h_by_realms(self, realms):
974 default_realm = None
975 for r in realms:
976 if getattr(r, 'default', False):
977 default_realm = r
978 if default_realm is None:
979 print "Error : there is no default realm defined!"
980 for h in self:
981 #print h.get_name(), h.realm
982 if h.realm is not None:
983 p = realms.find_by_name(h.realm.strip())
984 if p is not None:
985 h.realm = p
986 print "Host", h.get_name(), "is in the realm", p.get_name()
987 else:
988 err = "Error : the host %s got a invalid realm (%s)!" % (h.get_name(), h.realm)
989 h.configuration_errors.append(err)
990 h.realm = None
991 else:
992 #print "Notice : applying default realm %s to host %s" % (default_realm.get_name(), h.get_name())
993 h.realm = default_realm
994 h.got_default_realm = True
997 #We look for hostgroups property in hosts and
998 #link them
999 def linkify_h_by_hg(self, hostgroups):
1000 #Hostgroups property need to be fullfill for got the informations
1001 #self.apply_partial_inheritance('hostgroups')
1002 #self.apply_partial_inheritance('contact_groups')
1004 #Register host in the hostgroups
1005 for h in self:
1006 if not h.is_tpl():
1007 new_hostgroups = []
1008 if hasattr(h, 'hostgroups') and h.hostgroups != '':
1009 hgs = h.hostgroups.split(',')
1010 for hg_name in hgs:
1011 hg_name = hg_name.strip()
1012 hg = hostgroups.find_by_name(hg_name)
1013 if hg is not None:
1014 new_hostgroups.append(hg)
1015 else:
1016 err = "Error : the hostgroup '%s' of the host '%s' is unknown" % (hg_name, h.host_name)
1017 h.configuration_errors.append(err)
1018 h.hostgroups = new_hostgroups
1022 #It's used to change old Nagios2 names to
1023 #Nagios3 ones
1024 def old_properties_names_to_new(self):
1025 for h in self:
1026 h.old_properties_names_to_new()
1030 #We look for hostgroups property in hosts and
1031 def explode(self, hostgroups, contactgroups):
1032 #Hostgroups property need to be fullfill for got the informations
1033 #self.apply_partial_inheritance('hostgroups')
1034 #self.apply_partial_inheritance('contact_groups')
1036 #Register host in the hostgroups
1037 for h in self:
1038 if not h.is_tpl() and hasattr(h, 'host_name'):
1039 hname = h.host_name
1040 if hasattr(h, 'hostgroups'):
1041 hgs = h.hostgroups.split(',')
1042 for hg in hgs:
1043 hostgroups.add_member(hname, hg.strip())
1045 #items::explode_contact_groups_into_contacts
1046 #take all contacts from our contact_groups into our contact property
1047 self.explode_contact_groups_into_contacts(contactgroups)
1051 #Create depenancies:
1052 #Depencies at the host level: host parent
1053 def apply_dependancies(self):
1054 for h in self:
1055 h.fill_parents_dependancie()
1058 #Parent graph: use to find quickly relations between all host, and loop
1059 #return True if tehre is a loop
1060 def no_loop_in_parents(self):
1061 #Ok, we say "from now, no loop :) "
1062 r = True
1064 #Create parent graph
1065 parents = Graph()
1067 #With all hosts as nodes
1068 for h in self:
1069 if h is not None:
1070 parents.add_node(h)
1072 #And now fill edges
1073 for h in self:
1074 for p in h.parents:
1075 if p is not None:
1076 parents.add_edge(p, h)
1078 #Now get the list of all hosts in a loop
1079 host_in_loops = parents.loop_check()
1081 #and raise errors about it
1082 for h in host_in_loops:
1083 logger.log("Error: The host '%s' is part of a circular parent/child chain!" % h.get_name())
1084 r = False
1086 return r
1089 #Return a list of the host_name of the hosts
1090 #that gotthe template with name=tpl_name
1091 def find_hosts_that_use_template(self, tpl_name):
1092 res = []
1093 #first find the template
1094 tpl = None
1095 for h in self:
1096 #Look fortemplate with the good name
1097 if h.is_tpl() and hasattr(h, 'name') and h.name == tpl_name:
1098 tpl = h
1100 #If we find noone, we return nothing (easy case:) )
1101 if tpl is None:
1102 return []
1104 #Ok, we find the tpl
1105 for h in self:
1106 if tpl in h.templates and hasattr(h, 'host_name'):
1107 res.append(h.host_name)
1109 return res
1112 # Will create all business tree for the
1113 # services
1114 def create_business_rules(self, hosts, services):
1115 for h in self:
1116 h.create_business_rules(hosts, services)
1119 # Will link all business service/host with theirs
1120 # dep for problem/impact link
1121 def create_business_rules_dependencies(self):
1122 for h in self:
1123 h.create_business_rules_dependencies()