2 # Copyright (C) 2009-2010 :
3 # Gabes Jean, naparuba@gmail.com
4 # Gerhard Lausser, Gerhard.Lausser@consol.de
6 # This file is part of Shinken.
8 # Shinken is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU Affero General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # Shinken is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Affero General Public License for more details.
18 # You should have received a copy of the GNU Affero General Public License
19 # along with Shinken. If not, see <http://www.gnu.org/licenses/>.
22 """ This Class is the service one, s it manage all service specific thing.
23 If you look at the scheduling part, look at the scheduling item class"""
28 from ClusterShell
.NodeSet
import NodeSet
, NodeSetParseRangeError
33 from shinken
.autoslots
import AutoSlots
34 from shinken
.item
import Items
35 from shinken
.schedulingitem
import SchedulingItem
36 from shinken
.util
import to_int
, to_char
, to_split
, to_bool
, to_float
, strip_and_uniq
, format_t_into_dhms_format
, to_svc_hst_distinct_lists
, get_key_value_sequence
, GET_KEY_VALUE_SEQUENCE_ERROR_NOERROR
, GET_KEY_VALUE_SEQUENCE_ERROR_SYNTAX
, GET_KEY_VALUE_SEQUENCE_ERROR_NODEFAULT
, GET_KEY_VALUE_SEQUENCE_ERROR_NODE
, to_list_string_of_names
, expand_with_macros
37 from shinken
.property import UnusedProp
, BoolProp
, IntegerProp
, FloatProp
, CharProp
, StringProp
, ListProp
38 from shinken
.macroresolver
import MacroResolver
39 from shinken
.eventhandler
import EventHandler
40 from shinken
.log
import logger
43 class Service(SchedulingItem
):
44 # AutoSlots create the __slots__ with properties and
45 # running_properties names
46 __metaclass__
= AutoSlots
48 # Every service have a unique ID, and 0 is always special in
51 # The host and service do not have the same 0 value, now yes :)
53 # used by item class for format specific value like for Broks
56 # properties defined by configuration
57 # required : is required in conf
58 # default : default value if no set in conf
59 # pythonize : function to call when transfort string to python object
60 # fill_brok : if set, send to broker. there are two categories:
61 # full_status for initial and update status, check_result for check results
62 # no_slots : do not take this property for __slots__
64 'host_name' : StringProp(fill_brok
=['full_status', 'check_result', 'next_schedule']),
65 'hostgroup_name' : StringProp(default
= '', fill_brok
=['full_status']),
66 'service_description' : StringProp(fill_brok
= ['full_status', 'check_result', 'next_schedule']),
67 'display_name' : StringProp(default
='', fill_brok
=['full_status']),
68 'servicegroups' : StringProp(default
='', fill_brok
=['full_status'], brok_transformation
=to_list_string_of_names
),
69 'is_volatile' : BoolProp(default
='0', fill_brok
=['full_status']),
70 'check_command' : StringProp(fill_brok
= ['full_status']),
71 'initial_state' : CharProp(default
='o', fill_brok
=['full_status']),
72 'max_check_attempts' : IntegerProp(fill_brok
=['full_status']),
73 'check_interval' : IntegerProp(fill_brok
=['full_status']),
74 'retry_interval' : IntegerProp(fill_brok
=['full_status']),
75 'active_checks_enabled' : BoolProp(default
='1', fill_brok
= ['full_status']),
76 'passive_checks_enabled' : BoolProp(default
='1', fill_brok
=['full_status']),
77 'check_period' : StringProp(fill_brok
= ['full_status']),
78 'obsess_over_service' : BoolProp(default
='0', fill_brok
=['full_status']),
79 'check_freshness' : BoolProp(default
='0', fill_brok
=['full_status']),
80 'freshness_threshold' : IntegerProp(default
='0', fill_brok
=['full_status']),
81 'event_handler' : StringProp(default
='', fill_brok
=['full_status']),
82 'event_handler_enabled' : BoolProp(default
='0',fill_brok
=['full_status']),
83 'low_flap_threshold' : IntegerProp(default
='-1', fill_brok
= ['full_status']),
84 'high_flap_threshold' : IntegerProp(default
='-1', fill_brok
=['full_status']),
85 'flap_detection_enabled' : BoolProp(default
='1', fill_brok
=['full_status']),
86 'flap_detection_options' : ListProp(default
='o,w,c,u', fill_brok
=['full_status']),
87 'process_perf_data' : BoolProp(default
='1', fill_brok
=['full_status']),
88 'retain_status_information' : BoolProp(default
='1', fill_brok
=['full_status']),
89 'retain_nonstatus_information' : BoolProp(default
='1', fill_brok
=['full_status']),
90 'notification_interval' : IntegerProp(default
='60', fill_brok
=['full_status']),
91 'first_notification_delay' : IntegerProp(default
='0', fill_brok
=['full_status']),
92 'notification_period' : StringProp(fill_brok
=['full_status']),
93 'notification_options' : ListProp(default
='w,u,c,r,f,s',fill_brok
=['full_status']),
94 'notifications_enabled' : BoolProp(default
='1', fill_brok
=['full_status']),
95 'contacts' : StringProp(fill_brok
=['full_status']),
96 'contact_groups' : StringProp(fill_brok
=['full_status']),
97 'stalking_options' : ListProp(default
='', fill_brok
=['full_status']),
98 'notes' : StringProp(default
='', fill_brok
=['full_status']),
99 'notes_url' : StringProp(default
='', fill_brok
=['full_status']),
100 'action_url' : StringProp(default
='', fill_brok
=['full_status']),
101 'icon_image' : StringProp(default
='', fill_brok
=['full_status']),
102 'icon_image_alt' : StringProp(default
='', fill_brok
=['full_status']),
103 'failure_prediction_enabled' : BoolProp(default
='0', fill_brok
=['full_status']),
104 'parallelize_check' : BoolProp(default
='1', fill_brok
=['full_status']),
107 'poller_tag' : StringProp(default
=None),
109 'resultmodulations' : StringProp(default
=''),
110 'escalations' : StringProp(default
='', fill_brok
=['full_status']),
111 'maintenance_period' : StringProp(default
='', fill_brok
=['full_status']),
114 'duplicate_foreach' : StringProp(default
=''),
115 'default_value' : StringProp(default
=''),
118 'criticity' : IntegerProp(default
='3', fill_brok
=['full_status']),
122 # properties used in the running state
123 running_properties
= {
124 'last_chk' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
125 'next_chk' : IntegerProp(default
=0, fill_brok
=['full_status', 'next_schedule']),
126 'in_checking' : BoolProp(default
=False, fill_brok
=['full_status', 'check_result', 'next_schedule'], retention
=True),
127 'latency' : FloatProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True,),
128 'attempt' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'],retention
=True),
129 'state' : StringProp(default
='PENDING', fill_brok
=['full_status'], retention
=True),
130 'state_id' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
131 'current_event_id' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
132 'last_event_id' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
133 'last_state' : StringProp(default
='PENDING', fill_brok
=['full_status'], retention
=True),
134 'last_state_id' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
135 'last_state_change' : FloatProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
136 'last_hard_state_change' : FloatProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
137 'last_hard_state' : StringProp(default
='PENDING', fill_brok
=['full_status'], retention
=True),
138 'last_hard_state_id' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
139 'last_time_ok' : IntegerProp(default
=int(time
.time()), fill_brok
=['full_status', 'check_result'], retention
=True),
140 'last_time_warning' : IntegerProp(default
=int(time
.time()), fill_brok
= ['full_status', 'check_result'], retention
=True),
141 'last_time_critical' : IntegerProp(default
=int(time
.time()), fill_brok
=['full_status', 'check_result'], retention
=True),
142 'last_time_unknown' : IntegerProp(default
=int(time
.time()), fill_brok
=['full_status', 'check_result'], retention
=True),
143 'duration_sec' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
144 'state_type' : StringProp(default
='HARD', fill_brok
=['full_status'], retention
=True),
145 'state_type_id' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
146 'output' : StringProp(default
='', fill_brok
=['full_status', 'check_result'], retention
=True),
147 'long_output' : StringProp(default
='', fill_brok
=['full_status', 'check_result'], retention
=True),
148 'is_flapping' : BoolProp(default
=False, fill_brok
=['full_status'], retention
=True),
149 # dependencies for actions like notif of event handler,
150 # so AFTER check return
151 'act_depend_of' : ListProp(default
=[]),
152 # dependencies for checks raise, so BEFORE checks
153 'chk_depend_of' : ListProp(default
=[]),
154 # elements that depend of me, so the reverse than just uppper
155 'act_depend_of_me' : ListProp(default
=[]),
156 # elements that depend of me
157 'chk_depend_of_me' : ListProp(default
=[]),
159 'last_state_update' : FloatProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
160 'checks_in_progress' : ListProp(default
=[]), # no brok because checks are too linked
161 'notifications_in_progress' : ListProp(default
={}, retention
=True), # no broks because notifications are too linked
162 'downtimes' : ListProp(default
=[], fill_brok
=['full_status'], retention
=True),
163 'comments' : ListProp(default
=[], fill_brok
=['full_status'], retention
=True),
164 'flapping_changes' : ListProp(default
=[], fill_brok
=['full_status'], retention
=True),
165 'flapping_comment_id' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
166 'percent_state_change' : FloatProp(default
=0.0, fill_brok
=['full_status'], retention
=True),
167 'problem_has_been_acknowledged' : BoolProp(default
=False, fill_brok
=['full_status'], retention
=True),
168 'acknowledgement' : StringProp(default
=None, retention
=True),
169 'acknowledgement_type' : IntegerProp(default
=1, fill_brok
=['full_status', 'check_result'], retention
=True),
170 'check_type' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
171 'has_been_checked' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
172 'should_be_scheduled' : IntegerProp(default
=1, fill_brok
=['full_status'], retention
=True),
173 'last_problem_id' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
174 'current_problem_id' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
175 'execution_time' : FloatProp(default
=0.0, fill_brok
=['full_status', 'check_result'], retention
=True),
176 'last_notification' : FloatProp(default
=time
.time(), fill_brok
=['full_status'], retention
=True),
177 'current_notification_number' : IntegerProp(default
=0, fill_brok
=['full_status'],retention
=True),
178 'current_notification_id' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
179 'check_flapping_recovery_notification' : BoolProp(default
=True, fill_brok
=['full_status'], retention
=True),
180 'scheduled_downtime_depth' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
181 'pending_flex_downtime' : IntegerProp(default
=0, fill_brok
=['full_status'], retention
=True),
182 'timeout' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
183 'start_time' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
184 'end_time' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
185 'early_timeout' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
186 'return_code' : IntegerProp(default
=0, fill_brok
=['full_status', 'check_result'], retention
=True),
187 'perf_data' : StringProp(default
='', fill_brok
=['full_status', 'check_result'], retention
=True),
188 'last_perf_data' : StringProp(default
='', retention
=True),
189 'host' : StringProp(default
=None),
190 'customs' : ListProp(default
={}, fill_brok
=['full_status']),
191 'notified_contacts' : ListProp(default
=set()), # use for having all contacts we have notified
192 'in_scheduled_downtime' : BoolProp(default
=False, retention
=True),
193 'in_scheduled_downtime_during_last_check' : BoolProp(default
=False, retention
=True),
194 'actions' : ListProp(default
=[]), #put here checks and notif raised
195 'broks' : ListProp(default
=[]), #and here broks raised
197 # All errors and warning raised during the configuration parsing
198 # and taht will raised real warning/errors during the is_correct
199 'configuration_warnings' : ListProp(default
=[]),
200 'configuration_errors' : ListProp(default
=[]),
202 # Problem/impact part
203 'is_problem' : BoolProp(default
=False, fill_brok
=['full_status']),
204 'is_impact' : BoolProp(default
=False, fill_brok
=['full_status']),
205 # the save value of our criticity for "problems"
206 'my_own_criticity': IntegerProp(default
=-1),
207 # list of problems that make us an impact
208 'source_problems' : ListProp(default
=[], fill_brok
=['full_status'], brok_transformation
=to_svc_hst_distinct_lists
),
209 # list of the impact I'm the cause of
210 'impacts' : ListProp( default
=[], fill_brok
=['full_status'], brok_transformation
=to_svc_hst_distinct_lists
),
211 # keep a trace of the old state before being an impact
212 'state_before_impact' : StringProp(default
='PENDING'),
213 # keep a trace of the old state id before being an impact
214 'state_id_before_impact' : IntegerProp(default
=0),
215 # if the state change, we know so we do not revert it
216 'state_changed_since_impact' : BoolProp(default
=False),
218 # Easy Service dep definition
219 'service_dependencies' : ListProp(default
=''),# TODO : find a way to brok it?
221 #BUSINESS CORRELATOR PART
222 # Say if we are business based rule or not
223 'got_business_rule' : BoolProp(default
=False),
224 # Our Dependency node for the business rule
225 'business_rule' : StringProp(default
=None),
229 # Mapping between Macros and properties (can be prop or a function)
231 'SERVICEDESC' : 'service_description',
232 'SERVICEDISPLAYNAME' : 'display_name',
233 'SERVICESTATE' : 'state',
234 'SERVICESTATEID' : 'state_id',
235 'LASTSERVICESTATE' : 'last_state',
236 'LASTSERVICESTATEID' : 'last_state_id',
237 'SERVICESTATETYPE' : 'state_type',
238 'SERVICEATTEMPT' : 'attempt',
239 'MAXSERVICEATTEMPTS' : 'max_check_attempts',
240 'SERVICEISVOLATILE' : 'is_volatile',
241 'SERVICEEVENTID' : 'current_event_id',
242 'LASTSERVICEEVENTID' : 'last_event_id',
243 'SERVICEPROBLEMID' : 'current_problem_id',
244 'LASTSERVICEPROBLEMID' : 'last_problem_id',
245 'SERVICELATENCY' : 'latency',
246 'SERVICEEXECUTIONTIME' : 'execution_time',
247 'SERVICEDURATION' : 'get_duration',
248 'SERVICEDURATIONSEC' : 'get_duration_sec',
249 'SERVICEDOWNTIME' : 'get_downtime',
250 'SERVICEPERCENTCHANGE' : 'percent_state_change',
251 'SERVICEGROUPNAME' : 'get_groupname',
252 'SERVICEGROUPNAMES' : 'get_groupnames',
253 'LASTSERVICECHECK' : 'last_chk',
254 'LASTSERVICESTATECHANGE' : 'last_state_change',
255 'LASTSERVICEOK' : 'last_time_ok',
256 'LASTSERVICEWARNING' : 'last_time_warning',
257 'LASTSERVICEUNKNOWN' : 'last_time_unknown',
258 'LASTSERVICECRITICAL' : 'last_time_critical',
259 'SERVICEOUTPUT' : 'output',
260 'LONGSERVICEOUTPUT' : 'long_output',
261 'SERVICEPERFDATA' : 'perf_data',
262 'LASTSERVICEPERFDATA' : 'last_perf_data',
263 'SERVICECHECKCOMMAND' : 'get_check_command',
264 'SERVICEACKAUTHOR' : 'get_ack_author_name',
265 'SERVICEACKAUTHORNAME' : 'get_ack_author_name',
266 'SERVICEACKAUTHORALIAS' : 'get_ack_author_name',
267 'SERVICEACKCOMMENT' : 'get_ack_comment',
268 'SERVICEACTIONURL' : 'action_url',
269 'SERVICENOTESURL' : 'notes_url',
270 'SERVICENOTES' : 'notes'
273 # This tab is used to transform old parameters name into new ones
274 # so from Nagios2 format, to Nagios3 ones
276 'normal_check_interval' : 'check_interval',
277 'retry_check_interval' : 'retry_interval'
281 # Give a nice name output
283 if not self
.is_tpl():
284 return self
.service_description
288 # Get the servicegroups names
289 def get_groupnames(self
):
290 return ','.join([sg
.get_name() for sg
in self
.servicegroups
])
293 # Need the whole name for debugin purpose
294 def get_dbg_name(self
):
295 return "%s/%s" % (self
.host
.host_name
, self
.service_description
)
298 # Call by picle for dataify service
299 # we do a dict because list are too dangerous for
300 # retention save and co :( even if it's more
302 # The setstate function do the inverse
303 def __getstate__(self
):
304 # print "Asking a getstate for service", self.get_dbg_name()
306 # id is not in *_properties
307 res
= {'id' : self
.id}
308 for prop
in cls
.properties
:
309 if hasattr(self
, prop
):
310 res
[prop
] = getattr(self
, prop
)
311 for prop
in cls
.running_properties
:
312 if hasattr(self
, prop
):
313 res
[prop
] = getattr(self
, prop
)
318 # Inversed funtion of getstate
319 def __setstate__(self
, state
):
321 self
.id = state
['id']
322 for prop
in cls
.properties
:
324 setattr(self
, prop
, state
[prop
])
325 for prop
in cls
.running_properties
:
327 setattr(self
, prop
, state
[prop
])
330 # Check is required prop are set:
331 # template are always correct
332 # contacts OR contactgroups is need
333 def is_correct(self
):
334 state
= True # guilty or not? :)
337 special_properties
= ['contacts', 'contact_groups', 'check_period', \
338 'notification_interval', 'host_name', \
340 for prop
in cls
.properties
:
341 if prop
not in special_properties
:
342 if not hasattr(self
, prop
) and cls
.properties
[prop
].required
:
343 logger
.log('%s : I do not have %s' % (self
.get_name(), prop
))
344 state
= False # Bad boy...
346 # Raised all previously saw errors like unknown contacts and co
347 if self
.configuration_errors
!= []:
349 for err
in self
.configuration_errors
:
352 # Ok now we manage special cases...
353 if not hasattr(self
, 'contacts') \
354 and not hasattr(self
, 'contact_groups') \
355 and self
.notifications_enabled
== True:
356 logger
.log('%s : I do not have contacts nor contact_groups' % self
.get_name())
358 if not hasattr(self
, 'check_command'):
359 logger
.log("%s : I've got no check_command" % self
.get_name())
361 # Ok got a command, but maybe it's invalid
363 if not self
.check_command
.is_valid():
364 logger
.log("%s : my check_command %s is invalid" % (self
.get_name(), self
.check_command
.command
))
366 if not hasattr(self
, 'notification_interval') \
367 and self
.notifications_enabled
== True:
368 logger
.log("%s : I've got no notification_interval but I've got notifications enabled" % self
.get_name())
370 if not hasattr(self
, 'host') or self
.host
== None:
371 logger
.log("%s : I do not have an host" % self
.get_name())
373 if not hasattr(self
, 'check_period'):
374 self
.check_period
= None
375 if hasattr(self
, 'service_description'):
376 for c
in cls
.illegal_object_name_chars
:
377 if c
in self
.service_description
:
378 logger
.log("%s : My service_description got the caracter %s that is not allowed." % (self
.get_name(), c
))
384 # The service is dependent of his father dep
385 # Must be AFTER linkify
386 def fill_daddy_dependancy(self
):
387 # Depend of host, all status, is a networkdep
388 # and do not have timeperiod, and folow parents dep
389 if self
.host
is not None:
390 # I add the dep in MY list
391 self
.act_depend_of
.append( (self
.host
,
392 ['d', 'u', 's', 'f'],
395 # I add the dep in Daddy list
396 self
.host
.act_depend_of_me
.append( (self
,
397 ['d', 'u', 's', 'f'],
402 # Register the dependancy between 2 service for action (notification etc)
403 def add_service_act_dependancy(self
, srv
, status
, timeperiod
, inherits_parent
):
404 # first I add the other the I depend on in MY list
405 self
.act_depend_of
.append( (srv
, status
, 'logic_dep',
406 timeperiod
, inherits_parent
) )
407 # then I register myself in the other service dep list
408 srv
.act_depend_of_me
.append( (self
, status
, 'logic_dep',
409 timeperiod
, inherits_parent
) )
412 # Register the dependancy between 2 service for action (notification etc)
413 # but based on a BUSINESS rule, so on fact:
414 # ERP depend on database, so we fill just database.act_depend_of_me
415 # because we will want ERP mails to go on! So call this
416 # on the database service with the srv=ERP service
417 def add_business_rule_act_dependancy(self
, srv
, status
, timeperiod
, inherits_parent
):
418 print srv
.get_name(), "is asking to me", self
.get_name(), "to add him in my act_depend_of_me list"
419 # first I add the other the I depend on in MY list
420 # self.act_depend_of.append( (srv, status, 'logic_dep',
421 # timeperiod, inherits_parent) )
422 # I only register so he know that I WILL be a inpact
423 self
.act_depend_of_me
.append( (srv
, status
, 'business_dep',
424 timeperiod
, inherits_parent
) )
427 # Register the dependancy between 2 service for checks
428 def add_service_chk_dependancy(self
, srv
, status
, timeperiod
, inherits_parent
):
429 # first I add the other the I depend on in MY list
430 self
.chk_depend_of
.append( (srv
, status
, 'logic_dep',
431 timeperiod
, inherits_parent
) )
432 # then I register myself in the other service dep list
433 srv
.chk_depend_of_me
.append( (self
, status
, 'logic_dep',
434 timeperiod
, inherits_parent
) )
437 # Set unreachable : our host is DOWN, but it mean nothing for a service
438 def set_unreachable(self
):
442 # We just go an impact, so we go unreachable
443 # but only if it's enable in the configuration
444 def set_impact_state(self
):
446 if cls
.enable_problem_impacts_states_change
:
447 # Keep a trace of the old state (problem came back before
449 self
.state_before_impact
= self
.state
450 self
.state_id_before_impact
= self
.state_id
451 # this flag will know if we overide the impact state
452 self
.state_changed_since_impact
= False
453 self
.state
= 'UNKNOWN'# exit code UNDETERMINED
457 # Ok, we are no more an impact, if no news checks
458 # overide the impact state, we came back to old
460 # And only if we enable the state change for impacts
461 def unset_impact_state(self
):
463 if cls
.enable_problem_impacts_states_change
and not self
.state_changed_since_impact
:
464 self
.state
= self
.state_before_impact
465 self
.state_id
= self
.state_id_before_impact
468 # Set state with status return by the check
469 # and update flapping state
470 def set_state_from_exit_status(self
, status
):
472 self
.last_state_update
= now
475 # we should put in last_state the good last state:
476 # if not just change the state by an problem/impact
477 # we can take current state. But if it's the case, the
478 # real old state is self.state_before_impact (it's teh TRUE
480 # but only if the global conf have enable the impact state change
482 if cls
.enable_problem_impacts_states_change \
484 and not self
.state_changed_since_impact
:
485 self
.last_state
= self
.state_before_impact
486 else: # standard case
487 self
.last_state
= self
.state
492 self
.last_time_ok
= int(self
.last_state_update
)
495 self
.state
= 'WARNING'
497 self
.last_time_warning
= int(self
.last_state_update
)
500 self
.state
= 'CRITICAL'
502 self
.last_time_critical
= int(self
.last_state_update
)
505 self
.state
= 'UNKNOWN'
507 self
.last_time_unknown
= int(self
.last_state_update
)
510 self
.state
= 'CRITICAL'# exit code UNDETERMINED
512 self
.last_time_critical
= int(self
.last_state_update
)
515 if state_code
in self
.flap_detection_options
:
516 self
.add_flapping_change(self
.state
!= self
.last_state
)
518 if self
.state
!= self
.last_state
:
519 self
.last_state_change
= self
.last_state_update
521 self
.duration_sec
= now
- self
.last_state_change
524 # Return True if status is the state (like OK) or small form like 'o'
525 def is_state(self
, status
):
526 if status
== self
.state
:
529 elif status
== 'o' and self
.state
== 'OK':
531 elif status
== 'c' and self
.state
== 'CRITICAL':
533 elif status
== 'w' and self
.state
== 'WARNING':
535 elif status
== 'u' and self
.state
== 'UNKNOWN':
540 # The last time when the state was not OK
541 def last_time_non_ok_or_up(self
):
542 non_ok_times
= filter(lambda x
: x
> self
.last_time_ok
, [self
.last_time_warning
,
543 self
.last_time_critical
,
544 self
.last_time_unknown
])
545 if len(non_ok_times
) == 0:
546 last_time_non_ok
= 0 # program_start would be better
548 last_time_non_ok
= min(non_ok_times
)
549 return last_time_non_ok
552 # Add a log entry with a SERVICE ALERT like:
553 # SERVICE ALERT: server;Load;UNKNOWN;HARD;1;I don't know what to say...
554 def raise_alert_log_entry(self
):
555 logger
.log('SERVICE ALERT: %s;%s;%s;%s;%d;%s' % (self
.host
.get_name(),
563 # Add a log entry with a Freshness alert like:
564 # Warning: The results of host 'Server' are stale by 0d 0h 0m 58s (threshold=0d 1h 0m 0s).
565 # I'm forcing an immediate check of the host.
566 def raise_freshness_log_entry(self
, t_stale_by
, t_threshold
):
567 logger
.log("Warning: The results of service '%s' on host '%s' are stale by %s (threshold=%s). I'm forcing an immediate check of the service." \
568 % (self
.get_name(), self
.host
.get_name(), format_t_into_dhms_format(t_stale_by
), format_t_into_dhms_format(t_threshold
)))
571 # Raise a log entry with a Notification alert like
572 # SERVICE NOTIFICATION: superadmin;server;Load;OK;notify-by-rss;no output
573 def raise_notification_log_entry(self
, n
):
575 command
= n
.command_call
576 if n
.type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED',
577 'CUSTOM', 'ACKNOWLEDGEMENT', 'FLAPPINGSTART',
578 'FLAPPINGSTOP', 'FLAPPINGDISABLED'):
579 state
= '%s (%s)' % (n
.type, self
.state
)
582 if self
.__class
__.log_notifications
:
583 logger
.log("SERVICE NOTIFICATION: %s;%s;%s;%s;%s;%s" % (contact
.get_name(),
584 self
.host
.get_name(),
585 self
.get_name(), state
,
586 command
.get_name(), self
.output
))
589 # Raise a log entry with a Eventhandler alert like
590 # SERVICE EVENT HANDLER: test_host_0;test_ok_0;OK;SOFT;4;eventhandler
591 def raise_event_handler_log_entry(self
, command
):
592 if self
.__class
__.log_event_handlers
:
593 logger
.log("SERVICE EVENT HANDLER: %s;%s;%s;%s;%s;%s" % (self
.host
.get_name(),
601 # Raise a log entry with FLAPPING START alert like
602 # SERVICE FLAPPING ALERT: server;LOAD;STARTED; Service appears to have started flapping (50.6% change >= 50.0% threshold)
603 def raise_flapping_start_log_entry(self
, change_ratio
, threshold
):
604 logger
.log("SERVICE FLAPPING ALERT: %s;%s;STARTED; Service appears to have started flapping (%.1f% change >= %.1% threshold)" % \
605 (self
.host
.get_name(), self
.get_name(), change_ratio
, threshold
))
608 # Raise a log entry with FLAPPING STOP alert like
609 # SERVICE FLAPPING ALERT: server;LOAD;STOPPED; Service appears to have stopped flapping (23.0% change < 25.0% threshold)
610 def raise_flapping_stop_log_entry(self
, change_ratio
, threshold
):
611 logger
.log("SERVICE FLAPPING ALERT: %s;%s;STOPPED; Service appears to have stopped flapping (%.1f% change < %.1% threshold)" % \
612 (self
.host
.get_name(), self
.get_name(), change_ratio
, threshold
))
615 # If there is no valid time for next check, raise a log entry
616 def raise_no_next_check_log_entry(self
):
617 logger
.log("Warning : I cannot schedule the check for the service '%s' on host '%s' because there is not future valid time" % \
618 (self
.get_name(), self
.host
.get_name()))
621 # Raise a log entry when a downtime begins
622 # SERVICE DOWNTIME ALERT: test_host_0;test_ok_0;STARTED; Service has entered a period of scheduled downtime
623 def raise_enter_downtime_log_entry(self
):
624 logger
.log("SERVICE DOWNTIME ALERT: %s;%s;STARTED; Service has entered a period of scheduled downtime" % \
625 (self
.host
.get_name(), self
.get_name()))
628 # Raise a log entry when a downtime has finished
629 # SERVICE DOWNTIME ALERT: test_host_0;test_ok_0;STOPPED; Service has exited from a period of scheduled downtime
630 def raise_exit_downtime_log_entry(self
):
631 logger
.log("SERVICE DOWNTIME ALERT: %s;%s;STOPPED; Service has exited from a period of scheduled downtime" % \
632 (self
.host
.get_name(), self
.get_name()))
635 # Raise a log entry when a downtime prematurely ends
636 # SERVICE DOWNTIME ALERT: test_host_0;test_ok_0;CANCELLED; Service has entered a period of scheduled downtime
637 def raise_cancel_downtime_log_entry(self
):
638 logger
.log("SERVICE DOWNTIME ALERT: %s;%s;CANCELLED; Scheduled downtime for service has been cancelled." % \
639 (self
.host
.get_name(), self
.get_name()))
643 # Launch if check is waitconsume==first time
644 # and if c.status is in self.stalking_options
645 def manage_stalking(self
, c
):
647 if c
.status
== 'waitconsume':
648 if c
.exit_status
== 0 and 'o' in self
.stalking_options
:
650 elif c
.exit_status
== 1 and 'w' in self
.stalking_options
:
652 elif c
.exit_status
== 2 and 'c' in self
.stalking_options
:
654 elif c
.exit_status
== 3 and 'u' in self
.stalking_options
:
656 if c
.output
== self
.output
:
659 logger
.log("Stalking %s : %s" % (self
.get_name(), c
.output
))
662 # Give data for checks's macros
663 def get_data_for_checks(self
):
664 return [self
.host
, self
]
667 # Give data for evetn handlers's macros
668 def get_data_for_event_handler(self
):
669 return [self
.host
, self
]
672 # Give data for notifications'n macros
673 def get_data_for_notifications(self
, contact
, n
):
674 return [self
.host
, self
, contact
, n
]
677 # See if the notification is launchable (time is OK and contact is OK too)
678 def notification_is_blocked_by_contact(self
, n
, contact
):
679 return not contact
.want_service_notification(self
.last_chk
, self
.state
, n
.type, self
.criticity
)
682 def get_duration_sec(self
):
683 return str(int(self
.duration_sec
))
686 def get_duration(self
):
687 m
, s
= divmod(self
.duration_sec
, 60)
689 return "%02dh %02dm %02ds" % (h
, m
, s
)
692 def get_ack_author_name(self
):
693 if self
.acknowledgement
== None:
695 return self
.acknowledgement
.author
697 def get_ack_comment(self
):
698 if self
.acknowledgement
== None:
700 return self
.acknowledgement
.comment
703 def get_check_command(self
):
704 return self
.check_command
.get_name()
707 # Check if a notification for this service is suppressed at this time
708 def notification_is_blocked_by_item(self
, type, t_wished
= None):
710 t_wished
= time
.time()
713 # forced notification
714 # pass if this is a custom notification
716 # Block if notifications are program-wide disabled
717 if not self
.enable_notifications
:
720 # Does the notification period allow sending out this notification?
721 if not self
.notification_period
.is_time_valid(t_wished
):
724 # Block if notifications are disabled for this service
725 if not self
.notifications_enabled
:
728 # Block if the current status is in the notification_options w,u,c,r,f,s
729 if 'n' in self
.notification_options
:
731 if type in ('PROBLEM', 'RECOVERY'):
732 if self
.state
== 'UNKNOWN' and not 'u' in self
.notification_options
:
734 if self
.state
== 'WARNING' and not 'w' in self
.notification_options
:
736 if self
.state
== 'CRITICAL' and not 'c' in self
.notification_options
:
738 if self
.state
== 'OK' and not 'r' in self
.notification_options
:
740 if (type in ('FLAPPINGSTART', 'FLAPPINGSTOP', 'FLAPPINGDISABLED')
741 and not 'f' in self
.notification_options
):
743 if (type in ('DOWNTIMESTART', 'DOWNTIMEEND', 'DOWNTIMECANCELLED')
744 and not 's' in self
.notification_options
):
747 # Acknowledgements make no sense when the status is ok/up
748 if type == 'ACKNOWLEDGEMENT':
749 if self
.state
== self
.ok_up
:
752 # When in downtime, only allow end-of-downtime notifications
753 if self
.scheduled_downtime_depth
> 1 and type not in ('DOWNTIMEEND', 'DOWNTIMECANCELLED'):
756 # Block if host is in a scheduled downtime
757 if self
.host
.scheduled_downtime_depth
> 0:
760 # Block if in a scheduled downtime and a problem arises
761 if self
.scheduled_downtime_depth
> 0 and type in ('PROBLEM', 'RECOVERY'):
764 # Block if the status is SOFT
765 if self
.state_type
== 'SOFT' and type == 'PROBLEM':
768 # Block if the problem has already been acknowledged
769 if self
.problem_has_been_acknowledged
and type != 'ACKNOWLEDGEMENT':
776 # Block if host is down
777 if self
.host
.state
!= self
.host
.ok_up
:
785 # Get a oc*p command if item has obsess_over_*
786 # command. It must be enabled locally and globally
787 def get_obsessive_compulsive_processor_command(self
):
789 if not cls
.obsess_over
or not self
.obsess_over_service
:
793 data
= self
.get_data_for_event_handler()
794 cmd
= m
.resolve_command(cls
.ocsp_command
, data
)
795 e
= EventHandler(cmd
, timeout
=cls
.ocsp_timeout
)
797 # ok we can put it in our temp action queue
798 self
.actions
.append(e
)
801 def duplicate(self
, host
):
804 # In macro, it's all in UPPER case
805 prop
= self
.duplicate_foreach
.strip().upper()
806 # If I do not have the property, we bail out
807 if prop
in host
.customs
:
808 entry
= host
.customs
[prop
]
810 default_value
= getattr(self
, 'default_value', None)
811 # Transform the generator string to a list
812 # Missing values are filled with the default value
813 (key_values
, errcode
) = get_key_value_sequence(entry
, default_value
)
816 for key_value
in key_values
:
817 key
= key_value
['KEY']
818 value
= key_value
['VALUE']
820 new_s
.host_name
= host
.get_name()
821 if self
.is_tpl(): # if template, the new one is not
823 for key
in key_value
:
825 if hasattr(self
, 'service_description'):
826 new_s
.service_description
= self
.service_description
.replace('$'+key
+'$', key_value
[key
])
827 if hasattr(self
, 'check_command'):
828 # here we can replace VALUE, VALUE1, VALUE2,...
829 new_s
.check_command
= new_s
.check_command
.replace('$'+key
+'$', key_value
[key
])
830 # And then add in our list this new service
831 duplicates
.append(new_s
)
833 if errcode
== GET_KEY_VALUE_SEQUENCE_ERROR_SYNTAX
:
834 err
= "The custom property '%s' of the host '%s' is not a valid entry %s for a service generator" % (self
.duplicate_foreach
.strip(), host
.get_name(), entry
)
835 self
.configuration_errors
.append(err
)
836 elif errcode
== GET_KEY_VALUE_SEQUENCE_ERROR_NODEFAULT
:
837 err
= "The custom property '%s 'of the host '%s' has empty values %s but the service %s has no default_value" % (self
.duplicate_foreach
.strip(), host
.get_name(), entry
, self
.service_description
)
838 self
.configuration_errors
.append(err
)
839 elif errcode
== GET_KEY_VALUE_SEQUENCE_ERROR_NODE
:
840 err
= "The custom property '%s 'of the host '%s' has an invalid node range %s" % (self
.duplicate_foreach
.strip(), host
.get_name(), entry
, self
.service_description
)
841 self
.configuration_errors
.append(err
)
846 class Services(Items
):
847 inner_class
= Service
# use for know what is in items
848 # Create the reversed list for speedup search by host_name/name
849 # We also tag service already in list : they are twins. It'a a bad things.
850 # Hostgroups service have an ID higer thant host service. So it we tag
851 # an id that already are in the list, this service is already
852 # exist, and is a host,
853 # or a previous hostgroup, but define before.
854 def create_reversed_list(self
):
855 self
.reversed_list
= {}
858 if hasattr(s
, 'service_description') and hasattr(s
, 'host_name'):
859 s_desc
= getattr(s
, 'service_description')
860 s_host_name
= getattr(s
, 'host_name')
861 key
= (s_host_name
, s_desc
)
862 if key
not in self
.reversed_list
:
863 self
.reversed_list
[key
] = s
.id
865 self
.twins
.append(s
.id)
866 # For service, the reversed_list is not used for
867 # search, so we del it
868 del self
.reversed_list
872 # TODO : finish serach to use reversed
873 # Search a service id by it's name and host_name
874 def find_srv_id_by_name_and_hostname(self
, host_name
, name
):
875 # key = (host_name, name)
876 # if key in self.reversed_list:
877 # return self.reversed_list[key]
879 # if not, maybe in the whole list?
881 # Runtinme first, available only after linkify
882 if hasattr(s
, 'service_description') and hasattr(s
, 'host'):
883 if s
.service_description
== name
and s
.host
== host_name
:
885 # At config part, available before linkify
886 if hasattr(s
, 'service_description') and hasattr(s
, 'host_name'):
887 if s
.service_description
== name
and s
.host_name
== host_name
:
892 # Search a service by it's name and hot_name
893 def find_srv_by_name_and_hostname(self
, host_name
, name
):
894 if hasattr(self
, 'hosts'):
895 h
= self
.hosts
.find_by_name(host_name
)
898 return h
.find_service_by_name(name
)
900 id = self
.find_srv_id_by_name_and_hostname(host_name
, name
)
902 return self
.items
[id]
907 # Make link between elements:
910 # service -> timepriods
911 # service -> contacts
912 def linkify(self
, hosts
, commands
, timeperiods
, contacts
,
913 resultmodulations
, escalations
, servicegroups
):
914 self
.linkify_with_timeperiods(timeperiods
, 'notification_period')
915 self
.linkify_with_timeperiods(timeperiods
, 'check_period')
916 self
.linkify_with_timeperiods(timeperiods
, 'maintenance_period')
917 self
.linkify_s_by_hst(hosts
)
918 self
.linkify_s_by_sg(servicegroups
)
919 self
.linkify_one_command_with_commands(commands
, 'check_command')
920 self
.linkify_one_command_with_commands(commands
, 'event_handler')
921 self
.linkify_with_contacts(contacts
)
922 self
.linkify_with_resultmodulations(resultmodulations
)
923 # WARNING: all escalations will not be link here
924 # (just the escalation here, not serviceesca or hostesca).
925 # This last one will be link in escalations linkify.
926 self
.linkify_with_escalations(escalations
)
929 # We can link services with hosts so
930 # We can search in O(hosts) instead
931 # of O(services) for common cases
932 def optimize_service_search(self
, hosts
):
936 # We just search for each host the id of the host
937 # and replace the name by the id
938 # + inform the host we are a service of him
939 def linkify_s_by_hst(self
, hosts
):
942 hst_name
= s
.host_name
943 # The new member list, in id
944 hst
= hosts
.find_by_name(hst_name
)
946 # Let the host know we are his service
947 if s
.host
is not None:
948 hst
.add_service_link(s
)
949 except AttributeError , exp
:
950 pass # Will be catch at the is_correct moment
953 # We look for servicegroups property in services and
955 def linkify_s_by_sg(self
, servicegroups
):
958 new_servicegroups
= []
959 if hasattr(s
, 'servicegroups') and s
.servicegroups
!= '':
960 sgs
= s
.servicegroups
.split(',')
962 sg_name
= sg_name
.strip()
963 sg
= servicegroups
.find_by_name(sg_name
)
965 new_servicegroups
.append(sg
)
967 err
= "Error : the servicegroup '%s' of the service '%s' is unknown" % (sg_name
, s
.get_dbg_name())
968 s
.configuration_errors
.append(err
)
969 s
.servicegroups
= new_servicegroups
973 # Delete services by ids
974 def delete_services_by_id(self
, ids
):
979 # It's used to change old Nagios2 names to
981 def old_properties_names_to_new(self
):
983 s
.old_properties_names_to_new()
986 # Apply implicit inheritance for special properties:
987 # contact_groups, notification_interval , notification_period
988 # So service will take info from host if necessery
989 def apply_implicit_inheritance(self
, hosts
):
990 for prop
in ['contacts', 'contact_groups', 'notification_interval', \
991 'notification_period', 'resultmodulations', 'escalations', \
992 'poller_tag', 'check_period', 'criticity']:
995 if not hasattr(s
, prop
) and hasattr(s
, 'host_name'):
996 h
= hosts
.find_by_name(s
.host_name
)
997 if h
is not None and hasattr(h
, prop
):
998 setattr(s
, prop
, getattr(h
, prop
))
1001 # Apply inheritance for all properties
1002 def apply_inheritance(self
, hosts
):
1003 # We check for all Host properties if the host has it
1004 # if not, it check all host templates for a value
1005 for prop
in Service
.properties
:
1006 self
.apply_partial_inheritance(prop
)
1008 # Then implicit inheritance
1009 # self.apply_implicit_inheritance(hosts)
1011 s
.get_customs_properties_by_inheritance(self
)
1014 # Create dependancies for services (daddy ones)
1015 def apply_dependancies(self
):
1017 s
.fill_daddy_dependancy()
1020 # Add in our queue a service create from another. Special case :
1021 # is a template : so hname is a name of template, so need to get all
1022 # hosts that inherit from it.
1023 def copy_create_service_from_another(self
, hosts
, s
, hname
):
1024 for_hosts_to_create
= []
1025 # if we are not a template, it's easy : copy for all host_name
1026 # because they are our final host_name after all
1028 for_hosts_to_create
.append(hname
)
1030 # But for template it's more tricky : it's a template name
1031 # we've got, not a real host_name/ So we must get a list of host_name
1032 # that use this template
1034 hosts_from_tpl
= hosts
.find_hosts_that_use_template(hname
)
1035 # And now copy our real services
1036 for n
in hosts_from_tpl
:
1037 for_hosts_to_create
.append(n
)
1039 # Now really create the services
1041 for name
in for_hosts_to_create
:
1042 if not hasattr(s
, 'duplicate_foreach') or s
.duplicate_foreach
== '':
1044 new_s
.host_name
= name
1045 if s
.is_tpl(): # if template, the new one is not
1047 self
.items
[new_s
.id] = new_s
1048 else: # the generator case, we must create several new services
1049 # we must find our host, and get all key:value we need
1051 h
= hosts
.find_by_name(name
.strip())
1053 for new_s
in s
.duplicate(h
):
1054 self
.items
[new_s
.id] = new_s
1056 else: # TODO : raise an error?
1060 # We create new service if necessery (host groups and co)
1061 def explode(self
, hosts
, hostgroups
, contactgroups
,
1062 servicegroups
, servicedependencies
):
1063 # The "old" services will be removed. All services with
1064 # more than one host or a host group will be in it
1068 # items::explode_host_groups_into_hosts
1069 # take all hosts from our hostgroup_name into our host_name property
1070 self
.explode_host_groups_into_hosts(hosts
, hostgroups
)
1072 # items::explode_contact_groups_into_contacts
1073 # take all contacts from our contact_groups into our contact property
1074 self
.explode_contact_groups_into_contacts(contactgroups
)
1076 # Then for every host create a copy of the service with just the host
1077 # because we are adding services, we can't just loop in it
1078 service_to_check
= self
.items
.keys()
1080 for id in service_to_check
:
1082 duplicate_for_hosts
= [] # get the list of our host_names if more than 1
1083 not_hosts
= [] # the list of !host_name so we remove them after
1085 # print "Looking for s", s
1086 # if hasattr(s, 'duplicate_foreach'):
1087 # print s.duplicate_foreach
1089 # if not s.is_tpl(): # Exploding template is useless
1090 # Explode for real service or teplate with a host_name
1091 if hasattr(s
, 'host_name'):
1092 hnames
= s
.host_name
.split(',')
1093 hnames
= strip_and_uniq(hnames
)
1094 # We will duplicate if we have multiple host_name
1095 # or if we are a template (so a clean service)
1096 # print "WHEre", len(hnames) >= 2 or s.is_tpl()
1097 if len(hnames
) >= 2 or s
.is_tpl() \
1098 or (hasattr(s
, 'duplicate_foreach') and s
.duplicate_foreach
!= ''):
1099 for hname
in hnames
:
1100 hname
= hname
.strip()
1102 # If the name begin with a !, we put it in
1104 if len(hname
) > 0 and hname
[0] == '!':
1105 not_hosts
.append(hname
[1:])
1106 else: # the standard list
1107 duplicate_for_hosts
.append(hname
)
1109 # Ok now we clean the duplicate_for_hosts with all hosts
1111 for hname
in not_hosts
:
1112 if hname
in duplicate_for_hosts
:
1113 duplicate_for_hosts
.remove(hname
)
1115 # Now we duplicate the service for all host_names
1116 for hname
in duplicate_for_hosts
:
1117 self
.copy_create_service_from_another(hosts
, s
, hname
)
1119 # Multiple host_name -> the original service
1120 # must be delete. But template are clean else where
1121 # and only the the servce not got an error in it's conf
1122 if not s
.is_tpl() and s
.configuration_errors
== []:
1123 srv_to_remove
.append(id)
1125 else: # Maybe the hnames was full of same host,
1126 # so we must reset the name
1127 for hname
in hnames
: # So even if len == 0, we are protected
1130 # We clean all service that was for multiple hosts.
1131 self
.delete_services_by_id(srv_to_remove
)
1133 # Servicegroups property need to be fullfill for got the informations
1134 # And then just register to this service_group
1137 sname
= s
.service_description
1138 shname
= s
.host_name
1139 if hasattr(s
, 'servicegroups'):
1140 sgs
= s
.servicegroups
.split(',')
1142 servicegroups
.add_member(shname
+','+sname
, sg
)
1145 # Now we explode service_dependencies into Servicedependency
1146 # We just create serviceDep with goods values (as STRING!),
1147 # the link pass will be done after
1149 # Templates are useless here
1151 if hasattr(s
, 'service_dependencies'):
1152 if s
.service_dependencies
!= '':
1153 sdeps
= s
.service_dependencies
.split(',')
1154 # %2=0 are for hosts, !=0 are for service_decription
1158 if i
% 2 == 0: # host
1162 # we can register it (s) (depend on) -> (hname, desc)
1163 # If we do not have enouth data for s, it's no use
1164 if hasattr(s
, 'service_description') and hasattr(s
, 'host_name'):
1165 servicedependencies
.add_service_dependency(s
.host_name
, s
.service_description
, hname
, desc
)
1170 # Will create all business tree for the
1172 def create_business_rules(self
, hosts
, services
):
1174 s
.create_business_rules(hosts
, services
)
1177 # Will link all business service/host with theirs
1178 # dep for problem/impact link
1179 def create_business_rules_dependencies(self
):
1181 s
.create_business_rules_dependencies()