2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2017 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
28 def parse_ceph_status(info
):
33 parsed
+= " ".join(line
)
34 return json
.loads(parsed
)
37 def ceph_check_epoch(_id
, epoch
, params
):
38 warn
, crit
, avg_interval_min
= params
.get("epoch", (None, None, 1))
39 epoch_rate
= get_rate("%s.epoch.rate" % _id
, time
.time(), epoch
, avg_interval_min
)
40 epoch_avg
= get_average("%s.epoch.avg" % _id
, time
.time(), epoch_rate
, avg_interval_min
)
42 infotext
= 'Epoch: %s/%s' % (epoch_avg
, get_age_human_readable(avg_interval_min
* 60))
44 if warn
is not None and crit
is not None:
47 elif epoch_avg
>= warn
:
50 infotext
+= " (warn/crit at %.1f/%.1f)" % (warn
, crit
)
52 return state
, infotext
55 # .--status--------------------------------------------------------------.
57 # | ___| |_ __ _| |_ _ _ ___ |
58 # | / __| __/ _` | __| | | / __| |
59 # | \__ \ || (_| | |_| |_| \__ \ |
60 # | |___/\__\__,_|\__|\__,_|___/ |
62 # '----------------------------------------------------------------------'
64 # Suggested by customer: 1,3 per 30 min
65 factory_settings
['ceph_status_default_levels'] = {
70 def inventory_ceph_status(parsed
):
74 # TODO genereller Status -> ceph health (Ausnahmen für "too many PGs per OSD" als Option ermöglichen)
75 def check_ceph_status(_no_item
, params
, parsed
):
77 "HEALTH_OK": (0, "OK"),
78 "HEALTH_WARN": (1, "warning"),
79 "HEALTH_CRIT": (2, "critical"),
80 "HEALTH_ERR": (2, "error"),
83 overall_status
= parsed
["health"]["overall_status"]
84 state
, state_readable
= map_health_states
.get(overall_status
,
85 (3, "unknown[%s]" % overall_status
))
86 yield state
, 'Status: %s' % state_readable
87 yield ceph_check_epoch("ceph_status", parsed
["election_epoch"], params
)
90 check_info
['ceph_status'] = {
91 'parse_function': parse_ceph_status
,
92 'inventory_function': inventory_ceph_status
,
93 'check_function': check_ceph_status
,
94 'service_description': 'Ceph Status',
95 'default_levels_variable': 'ceph_status_default_levels',
99 # .--osds----------------------------------------------------------------.
101 # | ___ ___ __| |___ |
102 # | / _ \/ __|/ _` / __| |
103 # | | (_) \__ \ (_| \__ \ |
104 # | \___/|___/\__,_|___/ |
106 # '----------------------------------------------------------------------'
108 # Suggested by customer: 50, 100 per 15 min
109 factory_settings
['ceph_osds_default_levels'] = {
110 'epoch': (50, 100, 15),
111 'num_out_osds': (7.0, 5.0),
112 'num_down_osds': (7.0, 5.0),
116 def inventory_ceph_status_osds(parsed
):
117 if "osdmap" in parsed
:
121 def check_ceph_status_osds(_no_item
, params
, parsed
):
122 data
= parsed
["osdmap"]["osdmap"]
123 num_osds
= int(data
["num_osds"])
124 yield ceph_check_epoch("ceph_osds", data
["epoch"], params
)
126 for ds
, title
, state
in [
128 ('nearfull', 'Near full', 1),
132 yield state
, "%s: %s" % (title
, ", ".join(ds_value
))
134 yield 0, "OSDs: %s, Remapped PGs: %s" % (num_osds
, data
["num_remapped_pgs"])
136 for ds
, title
, param_key
in [
137 ('num_in_osds', 'OSDs out', 'num_out_osds'),
138 ('num_up_osds', 'OSDs down', 'num_down_osds'),
141 value
= num_osds
- data
[ds
]
142 value_perc
= 100 * float(value
) / num_osds
143 infotext
= "%s: %s, %s" % (title
, value
, get_percent_human_readable(value_perc
))
144 if params
.get(param_key
):
145 warn
, crit
= params
[param_key
]
146 if value_perc
>= crit
:
148 elif value_perc
>= crit
:
151 infotext
+= " (warn/crit at %s/%s)" % \
152 (get_percent_human_readable(warn
),
153 get_percent_human_readable(crit
))
155 yield state
, infotext
158 check_info
['ceph_status.osds'] = {
159 'inventory_function': inventory_ceph_status_osds
,
160 'check_function': check_ceph_status_osds
,
161 'service_description': 'Ceph OSDs',
162 'default_levels_variable': 'ceph_osds_default_levels',
163 'group': 'ceph_osds',
167 # .--pgs-----------------------------------------------------------------.
170 # | | '_ \ / _` / __| |
171 # | | |_) | (_| \__ \ |
172 # | | .__/ \__, |___/ |
174 # '----------------------------------------------------------------------'
177 def inventory_ceph_status_pgs(parsed
):
178 if "pgmap" in parsed
:
182 def check_ceph_status_pgs(_no_item
, params
, parsed
):
183 # Suggested by customer
185 "active": (0, "active"),
186 "backfill": (0, "backfill"),
187 "backfill_wait": (1, "backfill wait"),
188 "backfilling": (1, "backfilling"),
189 "backfill_toofull": (0, "backfill too full"),
190 "clean": (0, "clean"),
191 "creating": (0, "creating"),
192 "degraded": (1, "degraded"),
195 "incomplete": (2, "incomplete"),
196 "inconsistent": (2, "inconsistent"),
197 "peered": (2, "peered"),
198 "perring": (0, "peering"),
199 "recovering": (0, "recovering"),
200 "recovery_wait": (0, "recovery wait"),
201 "remapped": (0, "remapped"),
202 "repair": (0, "repair"),
203 "replay": (1, "replay"),
204 "scrubbing": (0, "scrubbing"),
205 "stale": (2, "stale"),
206 "undersized": (0, "undersized"),
207 "wait_backfill": (0, "wait backfill"),
210 data
= parsed
["pgmap"]
211 num_pgs
= data
["num_pgs"]
212 pgs_info
= "PGs: %s" % num_pgs
216 for pgs_by_state
in data
["pgs_by_state"]:
218 for status
in pgs_by_state
["state_name"].split("+"):
219 state
, state_readable
= map_pg_states
.get(status
, (3, "UNKNOWN[%s]" % status
))
221 statetexts
.append("%s%s" % (state_readable
, state_markers
[state
]))
222 infotexts
.append("Status '%s': %s" % ("+".join(statetexts
), pgs_by_state
["count"]))
224 return max(states
), "%s, %s" % (pgs_info
, ", ".join(infotexts
))
227 check_info
['ceph_status.pgs'] = {
228 'inventory_function': inventory_ceph_status_pgs
,
229 'check_function': check_ceph_status_pgs
,
230 'service_description': 'Ceph PGs',
234 # .--mgrs----------------------------------------------------------------.
236 # | _ __ ___ __ _ _ __ ___ |
237 # | | '_ ` _ \ / _` | '__/ __| |
238 # | | | | | | | (_| | | \__ \ |
239 # | |_| |_| |_|\__, |_| |___/ |
241 # '----------------------------------------------------------------------'
243 # Suggested by customer: 1, 2 per 5 min
244 factory_settings
['ceph_mgrs_default_levels'] = {
249 def inventory_ceph_status_mgrs(parsed
):
250 if "mgrmap" in parsed
:
254 def check_ceph_status_mgrs(_no_item
, params
, parsed
):
255 data
= parsed
["mgrmap"]
256 yield ceph_check_epoch("ceph_mgrs", data
["epoch"], params
)
259 check_info
['ceph_status.mgrs'] = {
260 'inventory_function': inventory_ceph_status_mgrs
,
261 'check_function': check_ceph_status_mgrs
,
262 'service_description': 'Ceph MGRs',
263 'default_levels_variable': 'ceph_mgrs_default_levels',
264 'group': 'ceph_mgrs',