Cleanup config.nodes_of
[check_mk.git] / checks / mssql_counters
blob01c0d1f0971c185e92120f5a4936081e5245dfbc
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # There are different types of information. Can we handle them in a
28 # general way? There are:
29 # - Percentage values
30 # - Size values in KB
31 # - Counters
32 # - Rate counters (per second)
34 # <<<mssql_counters>>>
35 # MSSQL_SQLEXPRESS:Buffer_Manager Buffer_cache_hit_ratio 12
36 # MSSQL_SQLEXPRESS:Databases master Data_File(s)_Size_(KB) 2304
37 # MSSQL_SQLEXPRESS:Databases master Transactions/sec 13733
38 # MSSQL_SQLEXPRESS:Databases master Percent_Log_Used 57
39 # MSSQL_SQLEXPRESS:Databases master Log_File(s)_Size_(KB)
40 # FOOBAR 170
42 inventory_mssql_counters_rules = []
44 #TODO if not counters: raise
47 def inventory_mssql_counters_generic(parsed, want_counters, dflt=None):
48 want_counters = set(want_counters)
49 for (obj, instance), node_data in parsed.iteritems():
50 for counters in node_data.values():
51 if not want_counters.intersection(counters.keys()):
52 continue
53 yield "%s %s" % (obj, instance), dflt
56 # .--main----------------------------------------------------------------.
57 # | _ |
58 # | _ __ ___ __ _(_)_ __ |
59 # | | '_ ` _ \ / _` | | '_ \ |
60 # | | | | | | | (_| | | | | | |
61 # | |_| |_| |_|\__,_|_|_| |_| |
62 # | |
63 # +----------------------------------------------------------------------+
64 # | |
65 # '----------------------------------------------------------------------'
67 # Previously there was no main check, but we need it because
68 # the sub checks
69 # - mssql_counters.transactions
70 # - mssql_counters.pageactivity
71 # - mssql_counters.locks
72 # will become cluster aware and thus all subchecks, too, because they share
73 # the same section. This main check is just a dummy with the benefit of the
74 # parse function.
77 def parse_mssql_counters_base(info):
78 parsed = {}
79 for line in info:
80 if len(line) < 4 or line[-1].startswith("ERROR: "):
81 continue
83 (node_name, obj, counter, instance), values = line[:4], line[4:]
85 if obj.endswith(':Databases'):
86 obj = obj[:-10]
88 if len(values) == 1:
89 values = values[0]
90 try:
91 values = float(values)
92 except ValueError:
93 try:
94 values = int(values)
95 except ValueError:
96 pass
98 if counter == "utc_time":
99 # mssql returns localized format. great! let's try ...
100 try:
101 # ... iso 8601
102 values = utc_mktime(
103 time.strptime(" ".join(values).split(".")[0], "%Y-%m-%d %H:%M:%S"))
104 except ValueError:
105 try:
106 # ... german
107 values = utc_mktime(time.strptime(" ".join(values), "%d.%m.%Y %H:%M:%S"))
108 except ValueError:
109 pass
111 data = parsed.setdefault((obj, instance), {}).setdefault(node_name, {})
112 data.setdefault(counter, values)
113 return parsed
116 def inventory_mssql_counters_base(_parsed):
117 return
120 def check_mssql_counters_base(_item, _params, _parsed):
121 return 3, 'Not implemented check plugin'
124 check_info['mssql_counters'] = {
125 'parse_function': parse_mssql_counters_base,
126 'inventory_function': inventory_mssql_counters_base,
127 'check_function': check_mssql_counters_base,
128 'service_description': "MSSQL",
129 'node_info': True,
133 # .--Percentage based values---------------------------------------------.
134 # | ____ _ |
135 # | | _ \ ___ _ __ ___ ___ _ __ | |_ __ _ __ _ ___ |
136 # | | |_) / _ \ '__/ __/ _ \ '_ \| __/ _` |/ _` |/ _ \ |
137 # | | __/ __/ | | (_| __/ | | | || (_| | (_| | __/ |
138 # | |_| \___|_| \___\___|_| |_|\__\__,_|\__, |\___| |
139 # | |___/ |
140 # | _ _ _ |
141 # | | |__ __ _ ___ ___ __| | __ ____ _| |_ _ ___ ___ |
142 # | | '_ \ / _` / __|/ _ \/ _` | \ \ / / _` | | | | |/ _ \/ __| |
143 # | | |_) | (_| \__ \ __/ (_| | \ V / (_| | | |_| | __/\__ \ |
144 # | |_.__/ \__,_|___/\___|\__,_| \_/ \__,_|_|\__,_|\___||___/ |
145 # | |
146 # '----------------------------------------------------------------------'
149 def inventory_mssql_counters_cache_hits(parsed, want_counters):
150 add_zero_based_services = host_extra_conf_merged(host_name(), inventory_mssql_counters_rules)\
151 .get('add_zero_based_services', False)
153 for (obj, instance), node_data in parsed.iteritems():
154 for counters in node_data.values():
155 for counter in counters.keys():
156 if counter not in want_counters:
157 continue
159 if counters.get('%s_base' % counter, 0.0) == 0.0 \
160 and not add_zero_based_services:
161 continue
163 yield "%s %s %s" % (obj, instance, counter), None
166 def check_mssql_counters_cache_hits(item, params, parsed):
167 obj, instance, counter = item.split()
168 node_data = parsed.get((obj, instance), {})
170 for node_name, counters in node_data.iteritems():
171 value = counters.get(counter)
172 base = counters.get("%s_base" % counter, 0)
174 if value is None or base is None:
175 # Assume general connection problem to the database, which is reported
176 # by the "X Instance" service and skip this check.
177 raise MKCounterWrapped("Failed to connect to database")
179 if base == 0:
180 base = 1
181 perc = 100.0 * value / base
183 node_info = ""
184 if node_name:
185 node_info = "[%s] " % node_name
186 infotext = "%s%s" % (node_info, get_percent_human_readable(perc))
187 state = 0
188 if params:
189 #TODO: Previously params=None(=dflt) in inventory_mssql_counters
190 warn, crit = params
191 if perc <= crit:
192 state = 2
193 elif perc <= warn:
194 state = 1
195 if state:
196 infotext += " (warn/crit below %s/%s)" % (warn, crit)
197 yield state, infotext, [(counter, perc)]
200 check_info['mssql_counters.cache_hits'] = {
201 'inventory_function': lambda parsed: inventory_mssql_counters_cache_hits(
202 parsed, ['cache_hit_ratio', 'log_cache_hit_ratio', 'buffer_cache_hit_ratio']),
203 'check_function': check_mssql_counters_cache_hits,
204 'service_description': "MSSQL %s",
205 'has_perfdata': True,
206 'node_info': True,
210 # .--Rates---------------------------------------------------------------.
211 # | ____ _ |
212 # | | _ \ __ _| |_ ___ ___ |
213 # | | |_) / _` | __/ _ \/ __| |
214 # | | _ < (_| | || __/\__ \ |
215 # | |_| \_\__,_|\__\___||___/ |
216 # | |
217 # '----------------------------------------------------------------------'
219 # ---transactions---------------------------------------------------------
222 def check_mssql_counters_transactions(item, params, parsed):
223 obj, instance = item.split()
224 node_data = parsed.get((obj, instance), {})
226 if not any(node_data.values()):
227 # Assume general connection problem to the database, which is reported
228 # by the "X Instance" service and skip this check.
229 raise MKCounterWrapped("Failed to connect to database")
231 for node_name, counters in node_data.iteritems():
232 now = counters.get('utc_time')
233 if now is None:
234 now = time.time()
236 node_info = ""
237 if node_name:
238 node_info = "[%s] " % node_name
240 for counter_key, title in [
241 ('transactions/sec', 'Transactions'),
242 ('write_transactions/sec', 'Write Transactions'),
243 ('tracked_transactions/sec', 'Tracked Transactions'),
245 value = counters.get(counter_key)
246 if value is None:
247 continue
249 rate = get_rate("mssql_counters.transactions.%s.%s.%s" % (node_name, item, counter_key),
250 now, value)
251 infotext = "%s%s: %.1f/s" % (node_info, title, rate)
252 node_info = ""
253 yield 0, infotext, [(counter_key, rate)]
256 check_info['mssql_counters.transactions'] = {
257 'inventory_function': lambda parsed: inventory_mssql_counters_generic(
258 parsed, ['transactions/sec', 'write_transactions/sec', 'tracked_transactions/sec']),
259 'check_function': check_mssql_counters_transactions,
260 'service_description': "MSSQL %s Transactions",
261 'has_perfdata': True,
262 'node_info': True,
265 # ---locks----------------------------------------------------------------
268 def check_mssql_counters_locks(item, params, parsed):
269 obj, instance = item.split()
270 node_data = parsed.get((obj, instance), {})
272 if not any(node_data.values()):
273 # Assume general connection problem to the database, which is reported
274 # by the "X Instance" service and skip this check.
275 raise MKCounterWrapped("Failed to connect to database")
277 for node_name, counters in node_data.iteritems():
278 now = counters.get('utc_time')
279 if now is None:
280 now = time.time()
282 node_info = ""
283 if node_name:
284 node_info = "[%s] " % node_name
286 for counter_key, title in [
287 ('lock_requests/sec', 'Requests'),
288 ('lock_timeouts/sec', 'Timeouts'),
289 ('number_of_deadlocks/sec', 'Deadlocks'),
290 ('lock_waits/sec', 'Waits'),
292 value = counters.get(counter_key)
293 if value is None:
294 continue
296 rate = get_rate("mssql_counters.locks.%s.%s.%s" % (node_name, item, counter_key), now,
297 value)
298 infotext = "%s%s: %.1f/s" % (node_info, title, rate)
299 node_info = ""
301 state = 0
302 warn, crit = params.get(counter_key, (None, None))
303 if crit is not None and rate >= crit:
304 state = 2
305 elif warn is not None and rate >= warn:
306 state = 1
307 if state:
308 infotext += " (warn/crit at %.1f/%.1f per second)" % (warn, crit)
310 yield state, infotext, [(counter_key, rate, warn, crit)]
313 check_info['mssql_counters.locks'] = {
314 'inventory_function': lambda parsed: inventory_mssql_counters_generic(
315 parsed,
316 ['number_of_deadlocks/sec', 'lock_requests/sec', 'lock_timeouts/sec', 'lock_waits/sec'],
317 dflt={}),
318 'check_function': check_mssql_counters_locks,
319 'service_description': "MSSQL %s Locks",
320 'has_perfdata': True,
321 'group': 'mssql_counters_locks',
322 'node_info': True,
325 # ---sql stats------------------------------------------------------------
328 def inventory_mssql_counters_sqlstats(parsed, want_counters, dflt=None):
329 for (obj, instance), node_data in parsed.iteritems():
330 for counters in node_data.values():
331 for counter in counters:
332 if counter not in want_counters:
333 continue
334 yield "%s %s %s" % (obj, instance, counter), dflt
337 def check_mssql_counters_sqlstats(item, params, parsed):
338 obj, instance, counter = item.split()
339 node_data = parsed.get((obj, instance), {})
341 if not any(node_data.values()):
342 # Assume general connection problem to the database, which is reported
343 # by the "X Instance" service and skip this check.
344 raise MKCounterWrapped("Failed to connect to database")
346 for node_name, counters in node_data.iteritems():
347 value = counters.get(counter)
348 if value is None:
349 return
351 now = counters.get('utc_time')
352 if now is None:
353 now = time.time()
355 rate = get_rate("mssql_counters.sqlstats.%s.%s.%s" % (node_name, item, counter), now, value)
356 node_info = ""
357 if node_name:
358 node_info = "[%s] " % node_name
359 infotext = "%s%.1f/sec" % (node_info, rate)
361 state = 0
362 warn, crit = params.get(counter, (None, None))
363 if crit is not None and rate >= crit:
364 state = 2
365 elif warn is not None and rate >= warn:
366 state = 1
367 if state:
368 infotext += " (warn/crit at %.1f/%.1f per second)" % (warn, crit)
370 yield state, infotext, [(counter, rate, warn, crit)]
373 check_info["mssql_counters.sqlstats"] = {
374 "inventory_function": lambda parsed: inventory_mssql_counters_sqlstats(
375 parsed, ["batch_requests/sec", "sql_compilations/sec", "sql_re-compilations/sec"], dflt={}),
376 "check_function": check_mssql_counters_sqlstats,
377 "service_description": "MSSQL %s",
378 "has_perfdata": True,
379 "group": "mssql_stats",
380 'node_info': True,
383 # ---page activity--------------------------------------------------------
386 def check_mssql_counters_pageactivity(item, params, parsed):
387 obj, instance = item.split()
388 node_data = parsed.get((obj, instance), {})
390 if not any(node_data.values()):
391 # Assume general connection problem to the database, which is reported
392 # by the "X Instance" service and skip this check.
393 raise MKCounterWrapped("Failed to connect to database")
395 for node_name, counters in node_data.iteritems():
396 now = counters.get('utc_time')
397 if now is None:
398 now = time.time()
400 node_info = ""
401 if node_name:
402 node_info = "[%s] " % node_name
404 for counter_key, title in [
405 ("page_reads/sec", "Reads"),
406 ("page_writes/sec", "Writes"),
407 ("page_lookups/sec", "Lookups"),
409 value = counters.get(counter_key)
410 if value is None:
411 continue
413 rate = get_rate("mssql_counters.pageactivity.%s.%s.%s" % (node_name, item, counter_key),
414 now, value)
415 infotext = "%s%s: %.1f/s" % (node_info, title, rate)
416 node_info = ""
418 state = 0
419 warn, crit = params.get(counter_key, (None, None))
420 if crit is not None and rate >= crit:
421 state = 2
422 elif warn is not None and rate >= warn:
423 state = 1
424 if state:
425 infotext += " (warn/crit at %.1f/%.1f per second)" % (warn, crit)
427 yield state, infotext, [(counter_key, rate, warn, crit)]
430 check_info['mssql_counters.pageactivity'] = {
431 'inventory_function': lambda parsed: inventory_mssql_counters_generic(
432 parsed, ["page_reads/sec", "page_writes/sec", "page_lookups/sec"], dflt={}),
433 'check_function': check_mssql_counters_pageactivity,
434 'service_description': "MSSQL %s Page Activity",
435 'has_perfdata': True,
436 'group': "mssql_page_activity",
437 'node_info': True,
440 # ---locks per batch------------------------------------------------------
443 def inventory_mssql_counters_locks_per_batch(parsed):
444 db_names = [(obj.split(":")[0], node_name)
445 for (obj, _instance), node_data in parsed.iteritems() if ":" in obj
446 for node_name in node_data]
448 for db_name, node_name in db_names:
449 if "lock_requests/sec" in parsed.get(("%s:Locks" % db_name, "_Total"), {})\
450 .get(node_name, {}) \
451 and "batch_requests/sec" in parsed.get(("%s:SQL_Statistics" % db_name, "None"))\
452 .get(node_name, {}):
453 yield db_name, {}
456 def check_mssql_counters_locks_per_batch(item, params, parsed):
457 data_locks_data = parsed.get(("%s:Locks" % item, "_Total"), {})
458 data_stats_data = parsed.get(("%s:SQL_Statistics" % item, "None"), {})
460 if not any(data_locks_data.values() + data_stats_data.values()):
461 # Assume general connection problem to the database, which is reported
462 # by the "X Instance" service and skip this check.
463 raise MKCounterWrapped("Failed to connect to database")
465 for node_name in set(data_locks_data.keys() + data_stats_data.keys()):
466 data_locks = data_locks_data[node_name]
467 data_stats = data_stats_data[node_name]
468 now = data_locks.get('utc_time', data_stats.get('utc_time'))
469 if now is None:
470 now = time.time()
472 locks = data_locks["lock_requests/sec"]
473 batches = data_stats["batch_requests/sec"]
475 lock_rate = get_rate("mssql_counters_locks_per_batch.%s.%s.locks" % (node_name, item), now,
476 locks)
477 batch_rate = get_rate("mssql_counters_locks_per_batch.%s.%s.batches" % (node_name, item),
478 now, batches)
480 if batch_rate == 0:
481 lock_per_batch = 0
482 else:
483 lock_per_batch = lock_rate / batch_rate
485 node_info = ""
486 if node_name:
487 node_info = "[%s] " % node_name
488 infotext = "%s%.1f" % (node_info, lock_per_batch)
489 state = 0
491 warn, crit = params.get('locks_per_batch', (None, None))
492 if crit is not None and lock_per_batch >= crit:
493 state = 2
494 elif warn is not None and lock_per_batch >= warn:
495 state = 1
497 if state:
498 infotext += " (warn/crit at %.1f/%.1f per second)" % (warn, crit)
500 yield state, infotext, [("locks_per_batch", lock_per_batch, warn, crit)]
503 check_info["mssql_counters.locks_per_batch"] = {
504 "inventory_function": inventory_mssql_counters_locks_per_batch,
505 "check_function": check_mssql_counters_locks_per_batch,
506 "service_description": "MSSQL %s Locks per Batch",
507 "has_perfdata": True,
508 "group": "mssql_stats",
509 'node_info': True,
513 # .--File Sizes----------------------------------------------------------.
514 # | _____ _ _ ____ _ |
515 # | | ___(_) | ___ / ___|(_)_______ ___ |
516 # | | |_ | | |/ _ \ \___ \| |_ / _ \/ __| |
517 # | | _| | | | __/ ___) | |/ / __/\__ \ |
518 # | |_| |_|_|\___| |____/|_/___\___||___/ |
519 # | |
520 # '----------------------------------------------------------------------'
523 def check_mssql_file_sizes(item, params, parsed):
524 obj, instance = item.split()
525 node_data = parsed.get((obj, instance), {})
527 if not any(node_data.values()):
528 # Assume general connection problem to the database, which is reported
529 # by the "X Instance" service and skip this check.
530 raise MKCounterWrapped("Failed to connect to database")
532 if not params:
533 params = {}
535 for node_name, counters in node_data.iteritems():
536 node_info = ""
537 if node_name:
538 node_info = "[%s] " % node_name
540 log_files_size = counters.get("log_file(s)_size_(kb)")
541 for val_bytes, key, title in [
542 (counters.get("data_file(s)_size_(kb)"), "data_files", "Data files"),
543 (log_files_size, "log_files", "Log files total"),
545 if val_bytes is None:
546 continue
548 val_bytes = val_bytes * 1024
549 infotext = "%s%s: %s" % (node_info, title, get_bytes_human_readable(val_bytes))
550 node_info = ""
552 state = 0
553 warn, crit = params.get(key, (None, None))
554 if crit is not None and val_bytes >= crit:
555 state = 2
556 elif warn is not None and val_bytes >= warn:
557 state = 1
558 if state:
559 infotext += " (warn/crit at %s/%s)" % (get_bytes_human_readable(warn),
560 get_bytes_human_readable(crit))
562 yield state, infotext, [(key, val_bytes, warn, crit)]
564 log_files_used = counters.get("log_file(s)_used_size_(kb)")
565 infotext = "Log files used: %s" % get_bytes_human_readable(log_files_used)
566 try:
567 log_files_used_perc = 100.0 * log_files_used / log_files_size
568 infotext += ", %s" % get_percent_human_readable(log_files_used_perc)
569 except (TypeError, ZeroDivisionError):
570 log_files_used_perc = None
572 warn, crit = params.get("log_files_used", (None, None))
573 if isinstance(crit, float) and log_files_used_perc is not None:
574 log_files_used_value = log_files_used_perc
575 readable_f = get_percent_human_readable
576 elif isinstance(warn, int):
577 log_files_used_value = log_files_used
578 readable_f = get_bytes_human_readable
579 else:
580 yield 0, infotext
581 continue
583 state = 0
584 if crit is not None and log_files_used_value >= crit:
585 state = 2
586 elif warn is not None and log_files_used_value >= warn:
587 state = 1
588 if state:
589 infotext += " (warn/crit at %s/%s)" % (readable_f(warn), readable_f(crit))
590 yield state, infotext
593 check_info['mssql_counters.file_sizes'] = {
594 'inventory_function': lambda parsed: inventory_mssql_counters_generic(
595 parsed, ['data_file(s)_size_(kb)', 'log_file(s)_size_(kb)', 'log_file(s)_used_size_(kb)'],
596 dflt={}),
597 'check_function': check_mssql_file_sizes,
598 'service_description': "MSSQL %s File Sizes",
599 'has_perfdata': True,
600 'group': "mssql_file_sizes",
601 'node_info': True,