Cleanup config.nodes_of
[check_mk.git] / checks / esx_vsphere_hostsystem
blobb269f506f1ed04550cc1ea1bfd31ac7f4ed45e76
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # TODO: The extra_sections part is only transitional code to avoid
28 # duplicate CPU services.
29 # Previously, the duplicate services were handled in the agent_vsphere
30 # directly. Starting with 1.5.0 this is problematic since multiple
31 # datasources can be defined in Check_MK natively. Therefore we shift
32 # the detection of the duplicate services directly to this check.
33 # When it is possible to handle duplicate services for different
34 # datasources correctly at the cmk_base level this code can be removed.
35 # NOTE: By design the extra_sections are passed to EVERY subcheck and
36 # therefore have to be removed in every subcheck!
37 # Change-Id: I3b6a56efcff4c75bbd05a051242e18deaa499d9e
38 check_info['esx_vsphere_hostsystem'] = {
39 'extra_sections': ["winperf_processor"],
43 def esx_vsphere_hostsystem_convert(info):
44 data = {}
45 for line in info:
46 data[line[0]] = line[1:]
47 return data
51 # .--CPU-----------------------------------------------------------------.
52 # | ____ ____ _ _ |
53 # | / ___| _ \| | | | |
54 # | | | | |_) | | | | |
55 # | | |___| __/| |_| | |
56 # | \____|_| \___/ |
57 # | |
58 # +----------------------------------------------------------------------+
60 esx_host_cpu_default_levels = {}
62 # hardware.cpuInfo.numCpuCores 12
63 # hardware.cpuInfo.numCpuPackages 2
64 # hardware.cpuInfo.numCpuThreads 24
65 # hardware.cpuInfo.hz 2933436846 --> In Hz per CPU Core
66 # summary.quickStats.overallCpuUsage 7539 --> In MHz
69 def inventory_esx_vsphere_hostsystem_cpu(info):
70 hostsystem_info, winperf_info = info
71 data = esx_vsphere_hostsystem_convert(hostsystem_info).keys()
72 if all([
73 not winperf_info,
74 'summary.quickStats.overallCpuUsage' in data,
75 'hardware.cpuInfo.hz' in data,
76 'hardware.cpuInfo.numCpuCores' in data,
77 ]):
78 return [(None, {})]
81 def check_esx_vsphere_hostsystem_cpu(item, params, info):
82 hostsystem_info, winperf_info = info
83 data = esx_vsphere_hostsystem_convert(hostsystem_info)
85 if winperf_info or "summary.quickStats.overallCpuUsage" not in data:
86 return
88 num_sockets = int(data['hardware.cpuInfo.numCpuPackages'][0])
89 num_cores = int(data['hardware.cpuInfo.numCpuCores'][0])
90 num_threads = int(data['hardware.cpuInfo.numCpuThreads'][0])
91 used_mhz = float(data['summary.quickStats.overallCpuUsage'][0])
92 mhz_per_core = float(data['hardware.cpuInfo.hz'][0]) / 1000.0 / 1000.0
93 total_mhz = mhz_per_core * num_cores
95 usage = used_mhz / total_mhz * 100
97 infotext = "%.1f%%" % usage
99 # Convert legacy parameters
100 state, infotext, perfdata = check_cpu_util(usage, params).next()
102 infotext += ", %.2fGHz/%.2fGHz" % (used_mhz / 1000.0, total_mhz / 1000.0)
103 infotext += ", %d sockets, %d cores/socket, %d threads" % (num_sockets, num_cores / num_sockets,
104 num_threads)
106 return (state, infotext, perfdata)
109 check_info['esx_vsphere_hostsystem.cpu_usage'] = {
110 "inventory_function": inventory_esx_vsphere_hostsystem_cpu,
111 "check_function": check_esx_vsphere_hostsystem_cpu,
112 "service_description": "CPU utilization",
113 "group": "cpu_utilization_os",
114 "has_perfdata": True,
115 "default_levels_variable": "esx_host_cpu_default_levels",
116 "includes": ["cpu_util.include"],
120 # .--Mem-Cluster---------------------------------------------------------.
121 # | __ __ ____ _ _ |
122 # | | \/ | ___ _ __ ___ / ___| |_ _ ___| |_ ___ _ __ |
123 # | | |\/| |/ _ \ '_ ` _ \ _____| | | | | | / __| __/ _ \ '__| |
124 # | | | | | __/ | | | | |_____| |___| | |_| \__ \ || __/ | |
125 # | |_| |_|\___|_| |_| |_| \____|_|\__,_|___/\__\___|_| |
126 # | |
127 # +----------------------------------------------------------------------+
130 def check_esx_vsphere_hostsystem_mem_cluster(item, params, info):
131 info, _ = info
132 data = {}
133 for line in info:
134 if line[0] in ["summary.quickStats.overallMemoryUsage", "hardware.memorySize", "name"]:
135 data.setdefault(line[0], []).append(line[1])
136 sorted_params = sorted(params, reverse=True)
138 nodes_count = len(data['name'])
139 total_memory_usage = sum(
140 savefloat(x) * 1024 * 1024 for x in data['summary.quickStats.overallMemoryUsage'])
141 total_memory_size = sum(savefloat(x) for x in data['hardware.memorySize'])
143 level = total_memory_usage / total_memory_size * 100
144 label = ""
145 state = 0
146 warn_perf, crit_perf = None, None
147 for count, levels in sorted_params:
148 if nodes_count >= count:
149 warn, crit = levels
150 warn_perf = warn * total_memory_size / 100
151 crit_perf = crit * total_memory_size / 100
153 if level > crit:
154 state = 2
155 label = " (Levels at %d%%/%d%%)" % (warn, crit)
156 elif level > warn:
157 state = 1
158 label = " (Levels at %d%%/%d%%)" % (warn, crit)
159 break
161 perf = [("usage", total_memory_usage, warn_perf, crit_perf, 0, total_memory_size),
162 ("mem_total", total_memory_size)]
163 yield state, "%d%%%s used - %s/%s" % \
164 (level, label, get_bytes_human_readable(total_memory_usage),
165 get_bytes_human_readable(total_memory_size)), perf
168 check_info['esx_vsphere_hostsystem.mem_usage_cluster'] = {
169 "check_function": check_esx_vsphere_hostsystem_mem_cluster,
170 "service_description": "Memory used",
171 "group": "mem_cluster",
172 "has_perfdata": True,
176 # .--CPU-Cluster---------------------------------------------------------.
177 # | ____ ____ _ _ ____ _ _ |
178 # | / ___| _ \| | | | / ___| |_ _ ___| |_ ___ _ __ |
179 # | | | | |_) | | | |_____| | | | | | / __| __/ _ \ '__| |
180 # | | |___| __/| |_| |_____| |___| | |_| \__ \ || __/ | |
181 # | \____|_| \___/ \____|_|\__,_|___/\__\___|_| |
182 # | |
183 # +----------------------------------------------------------------------+
186 def check_esx_vsphere_hostsystem_cpu_util_cluster(item, params, info):
187 info, _ = info
188 current_node = {}
190 def get_node_usage(node):
191 num_cores = int(node['hardware.cpuInfo.numCpuCores'])
192 num_threads = int(node['hardware.cpuInfo.numCpuThreads'])
193 used_mhz = float(node['summary.quickStats.overallCpuUsage'])
194 mhz_per_core = float(node['hardware.cpuInfo.hz']) / 1024.0 / 1024.0
195 total_mhz = mhz_per_core * num_cores
196 return used_mhz, total_mhz, num_threads
198 overall_used = []
199 overall_total = []
200 overall_threads = []
201 for line in info:
202 if line[0] in [
203 "hardware.cpuInfo.numCpuPackages",
204 "hardware.cpuInfo.numCpuCores",
205 "hardware.cpuInfo.numCpuThreads",
206 "summary.quickStats.overallCpuUsage",
207 "hardware.cpuInfo.hz",
209 current_node[line[0]] = line[1]
210 if len(current_node) == 5: # 5 keys -> node complete
211 node_used, node_total, node_threads = get_node_usage(current_node)
212 overall_used.append(node_used)
213 overall_total.append(node_total)
214 overall_threads.append(node_threads)
215 current_node = {}
217 sum_used = sum(overall_used)
218 sum_total = sum(overall_total)
219 sum_threads = sum(overall_threads)
220 usage = sum_used / sum_total * 100
221 node_count = len(overall_used)
223 # Convert legacy parameters
224 sorted_params = sorted(params, reverse=True)
225 for count, levels in sorted_params:
226 if node_count >= count:
227 state, infotext, perfdata = check_cpu_util(usage, levels).next()
228 break
229 else:
230 state, infotext, perfdata = check_cpu_util(usage, None).next()
232 yield 0, "%d Nodes" % node_count
233 yield 0, "%.2fGHz/%.2fGHz" % (sum_used / 1024.0, sum_total / 1024.0)
234 yield 0, "%d threads" % sum_threads
236 yield state, infotext, perfdata
239 check_info['esx_vsphere_hostsystem.cpu_util_cluster'] = {
240 "check_function": check_esx_vsphere_hostsystem_cpu_util_cluster,
241 "service_description": "CPU utilization",
242 "group": "cpu_utilization_cluster",
243 "has_perfdata": True,
244 "includes": ["cpu_util.include"],
247 # .--Memory--------------------------------------------------------------.
248 # | __ __ |
249 # | | \/ | ___ _ __ ___ ___ _ __ _ _ |
250 # | | |\/| |/ _ \ '_ ` _ \ / _ \| '__| | | | |
251 # | | | | | __/ | | | | | (_) | | | |_| | |
252 # | |_| |_|\___|_| |_| |_|\___/|_| \__, | |
253 # | |___/ |
254 # +----------------------------------------------------------------------+
256 esx_host_mem_default_levels = (80.0, 90.0)
259 def inventory_esx_vsphere_hostsystem_mem(info):
260 info, _ = info
261 data = esx_vsphere_hostsystem_convert(info).keys()
262 if 'summary.quickStats.overallMemoryUsage' in data and 'hardware.memorySize' in data:
263 return [(None, 'esx_host_mem_default_levels')]
266 def check_esx_vsphere_hostsystem_mem(item, params, info):
267 info, _ = info
268 data = esx_vsphere_hostsystem_convert(info)
270 if "summary.quickStats.overallMemoryUsage" not in data:
271 return
273 memory_usage = savefloat(data['summary.quickStats.overallMemoryUsage'][0]) * 1024 * 1024
274 memory_size = savefloat(data['hardware.memorySize'][0])
275 level = memory_usage / memory_size * 100
277 warn, crit = params
278 state = 0
279 label = ''
280 if level > crit:
281 state = 2
282 label = " (Levels at %d%%/%d%%)" % (warn, crit)
283 elif level > warn:
284 state = 1
285 label = " (Levels at %d%%/%d%%)" % (warn, crit)
287 message = "%d%%%s used - %s/%s" % \
288 (level, label, get_bytes_human_readable(memory_usage), get_bytes_human_readable(memory_size))
289 perf = [
290 ("usage", memory_usage, warn * memory_size / 100, crit * memory_size / 100, 0, memory_size),
291 ("mem_total", memory_size),
293 return (state, message, perf)
296 check_info['esx_vsphere_hostsystem.mem_usage'] = {
297 "inventory_function": inventory_esx_vsphere_hostsystem_mem,
298 "check_function": check_esx_vsphere_hostsystem_mem,
299 "service_description": "Memory used",
300 "group": "esx_host_memory",
301 "has_perfdata": True
305 # .--State---------------------------------------------------------------.
306 # | ____ _ _ |
307 # | / ___|| |_ __ _| |_ ___ |
308 # | \___ \| __/ _` | __/ _ \ |
309 # | ___) | || (_| | || __/ |
310 # | |____/ \__\__,_|\__\___| |
311 # | |
312 # +----------------------------------------------------------------------+
315 def inventory_esx_vsphere_hostsystem_state(info):
316 info, _ = info
317 data = esx_vsphere_hostsystem_convert(info).keys()
318 if 'runtime.inMaintenanceMode' in data:
319 return [(None, None)]
322 def check_esx_vsphere_hostsystem_state(_no_item, _no_params, info):
323 info, _ = info
324 data = esx_vsphere_hostsystem_convert(info)
325 state = 0
326 if "overallStatus" not in data:
327 return
329 overallStatus = str(data['overallStatus'][0])
330 if overallStatus == "yellow":
331 state = 1
332 elif overallStatus in ["red", "gray"]:
333 state = 2
334 yield state, "Entity state: " + overallStatus
336 state = 0
337 powerState = str(data['runtime.powerState'][0])
338 if powerState in ['poweredOff', 'unknown']:
339 state = 2
340 elif powerState == 'standBy':
341 state = 1
342 yield state, "Power state: " + powerState
345 check_info['esx_vsphere_hostsystem.state'] = {
346 "inventory_function": inventory_esx_vsphere_hostsystem_state,
347 "check_function": check_esx_vsphere_hostsystem_state,
348 "service_description": "Overall state",
352 # .--Maintenance---------------------------------------------------------.
353 # | __ __ _ _ |
354 # | | \/ | __ _(_)_ __ | |_ ___ _ __ __ _ _ __ ___ ___ |
355 # | | |\/| |/ _` | | '_ \| __/ _ \ '_ \ / _` | '_ \ / __/ _ \ |
356 # | | | | | (_| | | | | | || __/ | | | (_| | | | | (_| __/ |
357 # | |_| |_|\__,_|_|_| |_|\__\___|_| |_|\__,_|_| |_|\___\___| |
358 # | |
359 # +----------------------------------------------------------------------+
360 # | |
361 # '----------------------------------------------------------------------'
364 def inventory_esx_vsphere_hostsystem_maintenance(info):
365 info, _ = info
366 data = esx_vsphere_hostsystem_convert(info)
367 if 'runtime.inMaintenanceMode' in data:
368 current_state = str(data['runtime.inMaintenanceMode'][0]).lower()
369 return [(None, {'target_state': current_state})]
372 def check_esx_vsphere_hostsystem_maintenance(_no_item, params, info):
373 info, _ = info
374 data = esx_vsphere_hostsystem_convert(info)
375 target_state = params['target_state']
377 if "runtime.inMaintenanceMode" not in data:
378 return
380 current_state = str(data['runtime.inMaintenanceMode'][0]).lower()
381 state = 0
382 if target_state != current_state:
383 state = 2
384 if current_state == "true":
385 return state, "System running is in Maintenance mode"
386 return state, "System not in Maintenance mode"
389 check_info['esx_vsphere_hostsystem.maintenance'] = {
390 "inventory_function": inventory_esx_vsphere_hostsystem_maintenance,
391 "check_function": check_esx_vsphere_hostsystem_maintenance,
392 "service_description": "Maintenance Mode",
393 "group": "esx_hostystem_maintenance",
397 # .--Multipath-----------------------------------------------------------.
398 # | __ __ _ _ _ _ _ |
399 # | | \/ |_ _| | |_(_)_ __ __ _| |_| |__ |
400 # | | |\/| | | | | | __| | '_ \ / _` | __| '_ \ |
401 # | | | | | |_| | | |_| | |_) | (_| | |_| | | | |
402 # | |_| |_|\__,_|_|\__|_| .__/ \__,_|\__|_| |_| |
403 # | |_| |
404 # +----------------------------------------------------------------------+
406 # 5.1
407 # fc.20000024ff2e1b4c:21000024ff2e1b4c-fc.500a098088866d7e:500a098188866d7e-naa.60a9800044314f68553f436779684544 active
408 # unknown.vmhba0-unknown.2:0-naa.6b8ca3a0facdc9001a2a27f8197dd718 active
409 # 5.5
410 # fc.20000024ff3708ec:21000024ff3708ec-fc.500a098088866d7e:500a098188866d7e-naa.60a9800044314f68553f436779684544 active
411 # fc.500143802425a24d:500143802425a24c-fc.5001438024483280:5001438024483288-naa.5001438024483280 active
412 # >= version 6.0
413 # vmhba32:C0:T0:L0 active
416 def esx_vsphere_multipath_convert(info):
417 data = esx_vsphere_hostsystem_convert(info)
419 raw_path_info = data.get('config.storageDevice.multipathInfo')
420 if not raw_path_info:
421 return {}
422 path_info = zip(raw_path_info[::3], raw_path_info[1::3], raw_path_info[2::3])
424 paths = {}
425 for lun_id, path, state in path_info:
426 paths.setdefault(lun_id, []).append((state, path))
428 return paths
431 def inventory_esx_vsphere_hostsystem_multipath(info):
432 info, _ = info
433 return [(x, None) for x in esx_vsphere_multipath_convert(info)]
436 def check_esx_vsphere_hostsystem_multipath(item, params, info):
437 info, _ = info
438 state_infos = {
439 # alert_state, count, info
440 "active": [0, 0, ""],
441 "dead": [2, 0, ""],
442 "disabled": [1, 0, ""],
443 "standby": [0, 0, ""],
444 "unknown": [2, 0, ""]
447 state = 0
448 message = ""
449 path_names = []
451 for path, states in esx_vsphere_multipath_convert(info).iteritems():
452 if path == item:
453 # Collect states
454 for path_state, path_name in states:
455 state_item = state_infos.get(path_state)
456 path_info = path_name
457 if state_item:
458 state_item[1] += 1
459 state = max(state_item[0], state)
460 path_info += state_markers[state_item[0]]
461 path_names.append(path_info)
463 # Check warn, critical
464 if not params or isinstance(params, list):
465 if state_infos["standby"][1] > 0 and \
466 state_infos["standby"][1] != state_infos["active"][1]:
467 state = max(state_infos["standby"][0], state)
468 else:
469 state = 0
470 for state_name, state_values in state_infos.items():
471 if params.get(state_name):
472 limits = params.get(state_name)
473 if len(limits) == 2:
474 warn_max, crit_max = limits
475 crit_min, warn_min = 0, 0
476 else:
477 crit_min, warn_min, warn_max, crit_max = limits
479 count = state_values[1]
480 if count < crit_min:
481 state = max(state, 2)
482 state_values[2] = "(!!)(less than %d)" % crit_min
483 elif count > crit_max:
484 state = max(state, 2)
485 state_values[2] = "(!!)(more than %d)" % crit_max
486 elif count < warn_min:
487 state = max(state, 1)
488 state_values[2] = "(!)(less than %d)" % warn_min
489 elif count > warn_max:
490 state = max(state, 1)
491 state_values[2] = "(!)(more than %d)" % warn_max
493 # Output message
494 message = ""
496 element_text = []
497 for element in "active", "dead", "disabled", "standby", "unknown":
498 element_text.append(
499 "%d %s%s" % (state_infos[element][1], element, state_infos[element][2]))
500 message += ", ".join(element_text)
501 message += "\nIncluded Paths:\n" + "\n".join(path_names)
502 break
503 else:
504 return 3, "Path not found in agent output"
506 return state, message
509 check_info['esx_vsphere_hostsystem.multipath'] = {
510 "inventory_function": inventory_esx_vsphere_hostsystem_multipath,
511 "check_function": check_esx_vsphere_hostsystem_multipath,
512 "service_description": "Multipath %s",
513 "group": "multipath_count"