Cleanup config.nodes_of
[check_mk.git] / checks / nvidia
blob101b1a4990b7fe17389b461d75f08ed50b6a7dc1
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 factory_settings["nvidia_temp_default_levels"] = {"levels": (60, 65)}
29 factory_settings["nvidia_temp_core_default_levels"] = {"levels": (90, 95)}
32 def format_nvidia_name(identifier):
33 identifier = identifier.replace("Temp", "")
34 if identifier == "GPUCore":
35 return "GPU NVIDIA"
37 # afaik temperature sensors can be GPU or Board, maybe memory
38 return "System NVIDIA %s" % identifier
41 def inventory_nvidia_temp(core, info):
42 for line in info:
43 line_san = line[0].strip(":")
44 if line_san.lower().endswith("temp"):
45 if core == (line_san == "GPUCoreTemp"):
46 yield format_nvidia_name(line_san), {}
49 def check_nvidia_temp(item, params, info):
50 for line in info:
51 if format_nvidia_name(line[0].strip(":")) == item \
52 or item == line[0].strip(":"): # compatibility code for "old discovered" services
53 return check_temperature(int(line[1]), params, "nvidia_%s" % item)
56 check_info["nvidia.temp"] = {
57 'check_function': check_nvidia_temp,
58 'inventory_function': lambda info: inventory_nvidia_temp(False, info),
59 'service_description': 'Temperature %s',
60 'has_perfdata': True,
61 'includes': ['temperature.include'],
62 'group': 'temperature',
63 'default_levels_variable': 'nvidia_temp_default_levels'
66 check_info["nvidia.temp_core"] = {
67 'check_function': check_nvidia_temp,
68 'inventory_function': lambda info: inventory_nvidia_temp(True, info),
69 'service_description': 'Temperature %s',
70 'has_perfdata': True,
71 'includes': ['temperature.include'],
72 'group': 'temperature',
73 'default_levels_variable': 'nvidia_temp_core_default_levels'
77 def inventory_nvidia_errors(info):
78 for line in info:
79 if line[0] == 'GPUErrors:':
80 return [(None, None)]
83 def check_nvidia_errors(_no_item, _no_params, info):
84 for line in info:
85 if line[0] == "GPUErrors:":
86 errors = int(line[1])
87 if errors == 0:
88 return (0, "No GPU errors")
89 return (2, "%d GPU errors" % errors)
90 return (3, "incomplete output from agent")
93 check_info["nvidia.errors"] = {
94 'check_function': check_nvidia_errors,
95 'inventory_function': inventory_nvidia_errors,
96 'service_description': 'NVIDIA GPU Errors',
97 'group': 'hw_errors',