Cleanup config.nodes_of
[check_mk.git] / checks / esx_vsphere_counters
blobcb7a852f5c11167ca70bd69a88cd607ad4789ee5
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Example output:
28 # <<<esx_vsphere_counters:sep(124)>>>
29 # net.broadcastRx|vmnic0|11|number
30 # net.broadcastRx||11|number
31 # net.broadcastTx|vmnic0|0|number
32 # net.broadcastTx||0|number
33 # net.bytesRx|vmnic0|3820|kiloBytesPerSecond
34 # net.bytesRx|vmnic1|0|kiloBytesPerSecond
35 # net.bytesRx|vmnic2|0|kiloBytesPerSecond
36 # net.bytesRx|vmnic3|0|kiloBytesPerSecond
37 # net.bytesRx||3820|kiloBytesPerSecond
38 # net.bytesTx|vmnic0|97|kiloBytesPerSecond
39 # net.bytesTx|vmnic1|0|kiloBytesPerSecond
40 # net.bytesTx|vmnic2|0|kiloBytesPerSecond
41 # net.bytesTx|vmnic3|0|kiloBytesPerSecond
42 # net.bytesTx||97|kiloBytesPerSecond
43 # net.droppedRx|vmnic0|0|number
44 # net.droppedRx|vmnic1|0|number
45 # net.droppedRx|vmnic2|0|number
46 # net.droppedRx|vmnic3|0|number
47 # net.droppedRx||0|number
48 # net.droppedTx|vmnic0|0|number
49 # net.droppedTx|vmnic1|0|number
50 # ...
51 # sys.uptime||630664|second
53 # .--Disk IO-------------------------------------------------------------.
54 # | ____ _ _ ___ ___ |
55 # | | _ \(_)___| | __ |_ _/ _ \ |
56 # | | | | | / __| |/ / | | | | | |
57 # | | |_| | \__ \ < | | |_| | |
58 # | |____/|_|___/_|\_\ |___\___/ |
59 # | |
60 # '----------------------------------------------------------------------'
61 # Example output:
62 # disk.deviceLatency|naa.600605b002db9f7018d0a40c2a1444b0|0|millisecond
63 # disk.numberRead|naa.600605b002db9f7018d0a40c2a1444b0|8|number
64 # disk.numberWrite|naa.600605b002db9f7018d0a40c2a1444b0|47|number
65 # disk.read|naa.600605b002db9f7018d0a40c2a1444b0|12|kiloBytesPerSecond
66 # disk.read||12|kiloBytesPerSecond
67 # disk.write|naa.600605b002db9f7018d0a40c2a1444b0|51|kiloBytesPerSecond
68 # disk.write||51|kiloBytesPerSecond
71 def inventory_esx_vsphere_counters_diskio(parsed):
72 if "disk.read" in parsed and "" in parsed["disk.read"]:
73 return [("SUMMARY", {})]
76 def check_esx_vsphere_counters_diskio(item, params, parsed):
77 if "disk.read" not in parsed:
78 raise MKCounterWrapped("Counter data is missing")
80 values = {}
81 values['read_throughput'] = 0
82 values['write_throughput'] = 0
83 values['read_ios'] = 0
84 values['write_ios'] = 0
85 values['latency'] = 0
87 for what in ["read", "write"]:
88 if "disk.%s" % what in parsed and "" in parsed["disk.%s" % what]:
89 values['%s_throughput' % what] = int(
90 esx_vsphere_get_average(parsed["disk.%s" % what][""][0][0]) * 1024)
92 for info in parsed["disk.number%s" % what.title()].itervalues():
93 values["%s_ios" % what] += int(esx_vsphere_get_average(info[0][0]))
95 if "disk.deviceLatency" in parsed:
96 highest_latency = 0
97 for entry in parsed["disk.deviceLatency"].values():
98 highest_latency = max(highest_latency, max(map(int, entry[0][0])))
99 values["latency"] = highest_latency / 1000.0
101 return check_diskstat_dict(item, params, {"SUMMARY": values})
104 check_info['esx_vsphere_counters.diskio'] = {
105 'inventory_function': inventory_esx_vsphere_counters_diskio,
106 'check_function': check_esx_vsphere_counters_diskio,
107 'service_description': 'Disk IO %s',
108 'has_perfdata': True,
109 'group': 'diskstat',
110 'includes': ["diskstat.include"],
114 # .--Datastore IO--------------------------------------------------------.
115 # | ____ _ _ ___ ___ |
116 # | | _ \ __ _| |_ __ _ ___| |_ ___ _ __ ___ |_ _/ _ \ |
117 # | | | | |/ _` | __/ _` / __| __/ _ \| '__/ _ \ | | | | | |
118 # | | |_| | (_| | || (_| \__ \ || (_) | | | __/ | | |_| | |
119 # | |____/ \__,_|\__\__,_|___/\__\___/|_| \___| |___\___/ |
120 # | |
121 # +----------------------------------------------------------------------+
123 # Example output:
124 # datastore.read|4c4ece34-3d60f64f-1584-0022194fe902|0#1#2|kiloBytesPerSecond
125 # datastore.read|4c4ece5b-f1461510-2932-0022194fe902|0#4#5|kiloBytesPerSecond
126 # datastore.numberReadAveraged|511e4e86-1c009d48-19d2-bc305bf54b07|0#0#0|number
127 # datastore.numberWriteAveraged|4c4ece34-3d60f64f-1584-0022194fe902|0#0#1|number
128 # datastore.totalReadLatency|511e4e86-1c009d48-19d2-bc305bf54b07|0#5#5|millisecond
129 # datastore.totalWriteLatency|4c4ece34-3d60f64f-1584-0022194fe902|0#2#7|millisecond
132 def parse_esx_vsphere_counters(info):
133 parsed = {}
134 # The data reported by the ESX system is split into multiple real time samples with
135 # a fixed duration of 20 seconds. A check interval of one minute reports 3 samples
136 # The esx_vsphere_counters checks need to figure out by themselves how to handle this data
137 for counter, instance, multivalues, unit in info:
138 values = multivalues.split("#")
139 parsed.setdefault(counter, {})
140 parsed[counter].setdefault(instance, [])
141 parsed[counter][instance].append((values, unit))
142 return parsed
145 def esx_vsphere_get_average(values):
146 if not values:
147 return 0
148 int_values = map(int, values)
149 return sum(int_values) / float(len(int_values))
152 # esx datastores are either shown by human readable name (if available) or by the uid
153 def esx_vsphere_counters_get_item_mapping(parsed):
154 map_instance_to_item = {}
155 for counter in [
156 "read", "write", "datastoreReadIops", "datastoreWriteIops",
157 "sizeNormalizedDatastoreLatency"
159 for instance in parsed.get("datastore." + counter, {}).keys():
160 map_instance_to_item[instance] = instance
162 for instance, values in parsed.get("datastore.name", {}).items():
163 if instance in map_instance_to_item and values[0][0] != "":
164 map_instance_to_item[instance] = values[0][0][-1].replace(" ", "_")
165 return map_instance_to_item
168 def inventory_esx_vsphere_counters_datastoreio(parsed):
169 return inventory_diskstat_generic(
170 [[None, x] for x in esx_vsphere_counters_get_item_mapping(parsed).values()])
173 def check_esx_vsphere_counters_datastoreio(item, params, parsed):
174 if "datastore.read" not in parsed:
175 raise MKCounterWrapped("Counter data is missing")
177 datastores = {}
178 item_mapping = esx_vsphere_counters_get_item_mapping(parsed)
180 for new_name, eval_function, name, scaling in [
181 ("read_throughput", lambda x: int(esx_vsphere_get_average(x)), "datastore.read", 1024),
182 ("write_throughput", lambda x: int(esx_vsphere_get_average(x)), "datastore.write", 1024),
183 ("read_ios", lambda x: int(esx_vsphere_get_average(x)), "datastore.datastoreReadIops", 1),
184 ("write_ios", lambda x: int(esx_vsphere_get_average(x)), "datastore.datastoreWriteIops", 1),
185 ("latency", lambda x: max(map(int, x)), "datastore.sizeNormalizedDatastoreLatency",
186 1 / 1000000.0)
188 field_data = parsed.get(name, {})
190 for instance, values in field_data.items():
191 item_name = item_mapping[instance]
192 datastores.setdefault(item_name, {})
193 value = eval_function(values[0][0])
194 datastores[item_name][new_name] = value * scaling
196 return check_diskstat_dict(item, params, datastores)
199 check_info['esx_vsphere_counters'] = {
200 'parse_function': parse_esx_vsphere_counters,
201 'inventory_function': inventory_esx_vsphere_counters_datastoreio,
202 'check_function': check_esx_vsphere_counters_datastoreio,
203 'service_description': 'Datastore IO %s',
204 'has_perfdata': True,
205 'includes': ['diskstat.include'],
206 'group': 'diskstat'
210 # .--Interfaces----------------------------------------------------------.
211 # | ___ _ __ |
212 # | |_ _|_ __ | |_ ___ _ __ / _| __ _ ___ ___ ___ |
213 # | | || '_ \| __/ _ \ '__| |_ / _` |/ __/ _ \/ __| |
214 # | | || | | | || __/ | | _| (_| | (_| __/\__ \ |
215 # | |___|_| |_|\__\___|_| |_| \__,_|\___\___||___/ |
216 # | |
217 # '----------------------------------------------------------------------'
219 # The bad thing here: ESX does not send *counters* but *rates*. This might
220 # seem user friendly on the first look, but is really bad at the second. The
221 # sampling rate defaults to 20s and is not aligned with our check rate. Also
222 # a reschedule of the check does not create new data. And: our if.include really
223 # requires counters. In order to use if.include we therefore simulate counters.
226 def convert_esx_counters_if(parsed):
227 this_time = time.time()
228 by_item = {}
230 for name, instances in parsed.items():
231 if name.startswith("net."):
232 for instance, values in instances.items():
233 by_item.setdefault(instance, {})
234 if name == "net.macaddress":
235 by_item[instance][name[4:]] = values[0][0][-1]
236 else:
237 by_item[instance][name[4:]] = int(esx_vsphere_get_average(values[0][0]))
239 # Example of by_item:
241 # 'vmnic0': {
242 # 'broadcastRx': 31,
243 # 'broadcastTx': 0,
244 # 'bytesRx': 3905, # is in Kilobytes!
245 # 'bytesTx': 134,
246 # 'droppedRx': 0,
247 # 'droppedTx': 0,
248 # 'errorsRx': 0,
249 # 'errorsTx': 0,
250 # 'multicastRx': 5,
251 # 'multicastTx': 1,
252 # 'packetsRx': 53040,
253 # 'packetsTx': 30822,
254 # 'received': 3905,
255 # 'transmitted': 134,
256 # 'unknownProtos': 0,
257 # 'usage': 4040,
258 # 'macaddress': 'AA:BB:CC:DD:EE:FF",
259 # 'state': 2,
260 # 'bandwidth': 10000000,
261 # },
263 nics = by_item.keys()
264 nics.sort()
266 # This is for the if Check
267 converted = [
268 [], # 0 ifIndex 0
269 [], # 1 ifDescr 1
270 [], # 2 ifType 2
271 [], # 3 ifHighSpeed .. 1000 means 1Gbit
272 [], # 4 ifOperStatus 4
273 [], # 5 ifHCInOctets 5
274 [], # 6 ifHCInUcastPkts 6
275 [], # 7 ifHCInMulticastPkts 7
276 [], # 8 ifHCInBroadcastPkts 8
277 [], # 9 ifInDiscards 9
278 [], # 10 ifInErrors 10
279 [], # 11 ifHCOutOctets 11
280 [], # 12 ifHCOutUcastPkts 12
281 [], # 13 ifHCOutMulticastPkts 13
282 [], # 14 ifHCOutBroadcastPkts 14
283 [], # 15 ifOutDiscards 15
284 [], # 16 ifOutErrors 16
285 [], # 17 ifOutQLen 17
286 [], # 18 ifAlias 18
287 [], # 19 ifPhysAddress 19
290 # This is for mapping the incoming ESX counter data to table indeces above
291 # These can be passed directly to if
292 tableindex = {
293 'bytesRx': 5, # is in Kilobytes!
294 'packetsRx': 6,
295 'multicastRx': 7,
296 'broadcastRx': 8,
297 'droppedRx': 9,
298 'errorsRx': 10,
299 'bytesTx': 11,
300 'packetsTx': 12,
301 'multicastTx': 13,
302 'broadcastTx': 14,
303 'droppedTx': 15,
304 'errorsTx': 16,
305 # 'received': 3905,
306 # 'transmitted': 134,
307 # 'unknownProtos': 0,
308 # 'usage': 4040,
311 converted = []
312 for index, name in enumerate(nics):
313 entry = ['0'] * 20
314 converted.append(entry)
315 if name: # Skip summary entry without interface name
316 entry[0] = str(index)
317 entry[1] = name
318 entry[2] = '6' # Ethernet
319 entry[3] = str(by_item[name].get("bandwidth", "")) # Speed not known
320 entry[4] = str(by_item[name].get("state", "1"))
321 entry[18] = name # ifAlias
322 if by_item[name].get("macaddress"):
323 mac = "".join(chr(int(x, 16)) for x in by_item[name]["macaddress"].split(':'))
324 entry[19] = mac
325 else:
326 entry[19] = '' # MAC address not known here
327 for ctr_name, ti in tableindex.items():
328 ctr_value = by_item[name].get(ctr_name, 0)
329 if ctr_name.startswith("bytes"):
330 ctr_value *= 1024
331 countername = "vmnic." + name + "." + ctr_name
332 last_state = get_item_state(countername)
333 if last_state:
334 last_time, last_value = last_state
335 new_value = last_value + ((this_time - last_time) * ctr_value)
336 else:
337 last_time = this_time - 60
338 last_value = 0
339 new_value = ctr_value * 60
340 set_item_state(countername, (this_time, new_value))
341 entry[ti] = str(int(new_value))
343 return converted
346 def inventory_esx_vsphere_counters_if(parsed):
347 converted = convert_esx_counters_if(parsed)
348 return inventory_if_common(converted)
351 def check_esx_vsphere_counters_if(item, params, parsed):
352 if "net.bytesRx" not in parsed:
353 raise MKCounterWrapped("Counter data is missing")
355 converted = convert_esx_counters_if(parsed)
356 return check_if_common(item, params, converted)
359 check_info['esx_vsphere_counters.if'] = {
360 'inventory_function': inventory_esx_vsphere_counters_if,
361 'check_function': check_esx_vsphere_counters_if,
362 'service_description': 'Interface %s',
363 'has_perfdata': True,
364 'group': 'if',
365 'default_levels_variable': 'if_default_levels',
366 'includes': ['if.include'],
370 # .--Uptime--------------------------------------------------------------.
371 # | _ _ _ _ |
372 # | | | | |_ __ | |_(_)_ __ ___ ___ |
373 # | | | | | '_ \| __| | '_ ` _ \ / _ \ |
374 # | | |_| | |_) | |_| | | | | | | __/ |
375 # | \___/| .__/ \__|_|_| |_| |_|\___| |
376 # | |_| |
377 # '----------------------------------------------------------------------'
380 def inventory_esx_vsphere_counters_uptime(parsed):
381 if "sys.uptime" in parsed:
382 return [(None, {})]
385 def check_esx_vsphere_counters_uptime(_no_item, params, parsed):
386 if "sys.uptime" not in parsed:
387 raise MKCounterWrapped("Counter data is missing")
388 uptime = int(parsed["sys.uptime"][""][0][0][-1])
389 if uptime < 0:
390 raise MKCounterWrapped("Counter data is corrupt")
391 return check_uptime_seconds(params, uptime)
394 check_info['esx_vsphere_counters.uptime'] = {
395 'inventory_function': inventory_esx_vsphere_counters_uptime,
396 'check_function': check_esx_vsphere_counters_uptime,
397 'service_description': 'Uptime',
398 'has_perfdata': True,
399 'includes': ['uptime.include'],
400 'group': 'uptime',
404 # .--Ramdisk-------------------------------------------------------------.
405 # | ____ _ _ _ |
406 # | | _ \ __ _ _ __ ___ __| (_)___| | __ |
407 # | | |_) / _` | '_ ` _ \ / _` | / __| |/ / |
408 # | | _ < (_| | | | | | | (_| | \__ \ < |
409 # | |_| \_\__,_|_| |_| |_|\__,_|_|___/_|\_\ |
410 # | |
411 # +----------------------------------------------------------------------+
413 # We assume that all ramdisks have the same size (in mb) on all hosts
414 # -> To get size infos about unknown ramdisks, connect to the ESX host via
415 # SSH and check the size of the disk via "du" command
416 esx_vsphere_counters_ramdisk_sizes = {
417 'root': 32,
418 'etc': 28,
419 'tmp': 192,
420 'hostdstats': 319,
421 'snmptraps': 1,
422 'upgradescratch': 300,
423 'ibmscratch': 300,
424 'sfcbtickets': 1,
428 def inventory_esx_vsphere_counters_ramdisk(parsed):
429 ramdisks = []
431 for instance in parsed.get("sys.resourceMemConsumed", {}):
432 if instance.startswith('host/system/kernel/kmanaged/visorfs/'):
433 ramdisks.append(instance.split('/')[-1])
435 return df_inventory(ramdisks)
438 def check_esx_vsphere_counters_ramdisk(item, params, parsed):
439 if "sys.resourceMemConsumed" not in parsed:
440 raise MKCounterWrapped("Counter data is missing")
442 ramdisks = []
443 for instance in parsed.get("sys.resourceMemConsumed").iterkeys():
444 if instance.startswith('host/system/kernel/kmanaged/visorfs/'):
445 name = instance.split('/')[-1]
446 try:
447 size_mb = esx_vsphere_counters_ramdisk_sizes[name]
448 except KeyError:
449 if item == name:
450 return 3, 'Unhandled ramdisk found (%s)' % name
451 else:
452 continue
453 used_mb = float(parsed["sys.resourceMemConsumed"][instance][0][0][-1]) / 1000
454 avail_mb = size_mb - used_mb
455 ramdisks.append((name, size_mb, avail_mb, 0))
457 return df_check_filesystem_list(item, params, ramdisks)
460 check_info['esx_vsphere_counters.ramdisk'] = {
461 'inventory_function': inventory_esx_vsphere_counters_ramdisk,
462 'check_function': check_esx_vsphere_counters_ramdisk,
463 'service_description': 'Ramdisk %s',
464 'has_perfdata': True,
465 'includes': ['size_trend.include', 'df.include'],
466 'group': 'filesystem',
467 'default_levels_variable': 'filesystem_default_levels',