Cleanup config.nodes_of
[check_mk.git] / checks / aws_rds
blob364437379b6c54bcae0f908f9c48dd8296052db4
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2019 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
28 def parse_aws_rds(info):
29 parsed = {}
30 for metrics in _extract_aws_metrics_by_labels(
32 "CPUUtilization",
33 "CPUCreditUsage",
34 "CPUCreditBalance",
35 "DatabaseConnections",
36 "FailedSQLServerAgentJobsCount",
37 "BinLogDiskUsage",
38 "OldestReplicationSlotLag",
39 "ReplicaLag",
40 "ReplicationSlotDiskUsage",
41 "TransactionLogsDiskUsage",
42 "TransactionLogsGeneration",
43 "NetworkReceiveThroughput",
44 "NetworkTransmitThroughput",
45 "DiskQueueDepth",
46 "WriteIOPS",
47 "WriteLatency",
48 "WriteThroughput",
49 "ReadIOPS",
50 "ReadLatency",
51 "ReadThroughput",
52 "BurstBalance",
53 #"FreeableMemory",
54 #"SwapUsage",
55 #"FreeStorageSpace",
56 #"MaximumUsedTransactionIDs",
57 "AllocatedStorage",
59 parse_aws(info),
60 extra_keys=['DBInstanceIdentifier']).itervalues():
62 for key, factor in [
63 ('AllocatedStorage', 1.074e+9),
64 ('TransactionLogsDiskUsage', 1024**3),
65 ('ReplicationSlotDiskUsage', 1024**3),
66 ('OldestReplicationSlotLag', 1024**3),
68 try:
69 metrics[key] *= factor
70 except KeyError:
71 pass
72 parsed.setdefault(metrics['DBInstanceIdentifier'], metrics)
73 return parsed
76 # .--CPU utilization-----------------------------------------------------.
77 # | ____ ____ _ _ _ _ _ _ _ _ |
78 # | / ___| _ \| | | | _ _| |_(_) (_)______ _| |_(_) ___ _ __ |
79 # | | | | |_) | | | | | | | | __| | | |_ / _` | __| |/ _ \| '_ \ |
80 # | | |___| __/| |_| | | |_| | |_| | | |/ / (_| | |_| | (_) | | | | |
81 # | \____|_| \___/ \__,_|\__|_|_|_/___\__,_|\__|_|\___/|_| |_| |
82 # | |
83 # '----------------------------------------------------------------------'
85 factory_settings['aws_rds_cpu_util'] = {
86 'levels': (80.0, 90.0),
90 @get_parsed_item_data
91 def check_aws_rds(item, params, metrics):
92 return check_cpu_util(metrics['CPUUtilization'], params, time.time())
95 check_info['aws_rds'] = {
96 'parse_function': parse_aws_rds,
97 'inventory_function': lambda p:\
98 inventory_aws_generic(p, ['CPUUtilization']),
99 'check_function': check_aws_rds,
100 'service_description': 'AWS/RDS %s CPU Utilization',
101 'includes': ['cpu_util.include', 'aws.include'],
102 'group': 'cpu_utilization_multiitem',
103 'default_levels_variable': 'aws_rds_cpu_util',
104 'has_perfdata': True,
108 # .--CPU credits---------------------------------------------------------.
109 # | ____ ____ _ _ _ _ _ |
110 # | / ___| _ \| | | | ___ _ __ ___ __| (_) |_ ___ |
111 # | | | | |_) | | | | / __| '__/ _ \/ _` | | __/ __| |
112 # | | |___| __/| |_| | | (__| | | __/ (_| | | |_\__ \ |
113 # | \____|_| \___/ \___|_| \___|\__,_|_|\__|___/ |
114 # | |
115 # '----------------------------------------------------------------------'
117 # CPU credit balance:
118 # For standard T2 instances with bursting, a burst can continue only as long as
119 # there are available CPU credits, so it’s important to monitor your instance’s
120 # balance. Credits are earned any time the instance is running below its baseline
121 # CPU performance level. The initial balance, accrual rate, and maximum possible
122 # balance are all dependent on the instance level.
124 # CPU credit usage:
125 # One CPU credit is equivalent to one minute of 100 percent CPU utilization (or
126 # two minutes at 50 percent, etc.). Whenever an instance requires CPU performance
127 # above that instance type’s baseline, it will burst, consuming CPU credits until
128 # the demand lessens or the credit balance runs out. Keeping an eye on your
129 # instances’ credit usage will help you identify whether you might need to switch
130 # to an instance type that is optimized for CPU-intensive workloads. Or, you can
131 # create an alert for when your credit balance drops below a threshold while CPU
132 # usage remains above baseline.
135 @get_parsed_item_data
136 def check_aws_rds_cpu_credits(item, params, metrics):
137 yield 0, "Usage: %.2f" % metrics['CPUCreditUsage']
138 warn, crit = params.get("balance_levels_lower", (None, None))
139 yield check_levels(
140 metrics['CPUCreditBalance'],
141 "aws_cpu_credit_balance", (None, None, warn, crit),
142 human_readable_func=lambda x: "%.2f" % x,
143 infoname='Balance')
145 burst_balance = metrics.get('BurstBalance')
146 if burst_balance is not None:
147 warn, crit = params.get("burst_balance_levels_lower", (None, None))
148 yield check_levels(
149 metrics['BurstBalance'],
150 "aws_burst_balance", (None, None, warn, crit),
151 human_readable_func=get_percent_human_readable,
152 infoname='Balance')
155 check_info['aws_rds.cpu_credits'] = {
156 'inventory_function': lambda p:\
157 inventory_aws_generic(p, ['CPUCreditUsage', 'CPUCreditBalance']),
158 'check_function': check_aws_rds_cpu_credits,
159 'service_description': 'AWS/RDS %s CPU Credits',
160 'includes': ['aws.include'],
161 'group': 'aws_rds_cpu_credits',
162 'has_perfdata': True,
166 # .--network IO----------------------------------------------------------.
167 # | _ _ ___ ___ |
168 # | _ __ ___| |___ _____ _ __| | __ |_ _/ _ \ |
169 # | | '_ \ / _ \ __\ \ /\ / / _ \| '__| |/ / | | | | | |
170 # | | | | | __/ |_ \ V V / (_) | | | < | | |_| | |
171 # | |_| |_|\___|\__| \_/\_/ \___/|_| |_|\_\ |___\___/ |
172 # | |
173 # '----------------------------------------------------------------------'
176 @get_parsed_item_data
177 def check_aws_rds_network_io(item, params, metrics):
178 interfaces = [[
179 "0",
180 item,
181 "1",
183 "1",
184 metrics['NetworkReceiveThroughput'],
190 metrics['NetworkTransmitThroughput'],
197 metrics.get('DBInstanceIdentifier', item),
200 return check_if_common_single(item, params, interfaces)
203 check_info['aws_rds.network_io'] = {
204 'inventory_function': lambda p:\
205 inventory_aws_generic(p, ['NetworkReceiveThroughput', 'NetworkTransmitThroughput']),
206 'check_function': check_aws_rds_network_io,
207 'service_description': 'AWS/RDS %s Network IO',
208 'includes': ['aws.include', "if.include"],
209 'default_levels_variable': "if_default_levels",
210 'group': 'if',
211 'has_perfdata': True,
215 # .--bin log usage-------------------------------------------------------.
216 # | _ _ _ |
217 # | | |__ (_)_ __ | | ___ __ _ _ _ ___ __ _ __ _ ___ |
218 # | | '_ \| | '_ \ | |/ _ \ / _` | | | | / __|/ _` |/ _` |/ _ \ |
219 # | | |_) | | | | | | | (_) | (_| | | |_| \__ \ (_| | (_| | __/ |
220 # | |_.__/|_|_| |_| |_|\___/ \__, | \__,_|___/\__,_|\__, |\___| |
221 # | |___/ |___/ |
222 # '----------------------------------------------------------------------'
225 @get_parsed_item_data
226 def check_aws_rds_bin_log_usage(item, params, metrics):
227 bin_log_usage = metrics['BinLogDiskUsage']
228 yield 0, get_bytes_human_readable(bin_log_usage)
230 try:
231 usage = 100.0 * bin_log_usage / metrics['AllocatedStorage']
232 except (KeyError, ZeroDivisionError):
233 yield 1, 'Cannot calculate usage'
234 else:
235 yield check_levels(
236 usage,
237 "aws_rds_bin_log_disk_usage",
238 params.get('levels', (None, None)),
239 human_readable_func=get_percent_human_readable)
242 check_info['aws_rds.bin_log_usage'] = {
243 'inventory_function': lambda p:\
244 inventory_aws_generic(p, ['BinLogDiskUsage', 'AllocatedStorage']),
245 'check_function': check_aws_rds_bin_log_usage,
246 'service_description': 'AWS/RDS %s Binary Log Usage',
247 'includes': ['aws.include'],
248 'has_perfdata': True,
249 'group': 'aws_rds_disk_usage',
253 # .--transaction logs usage----------------------------------------------.
254 # | _ _ _ |
255 # | | |_ _ __ __ _ _ __ ___ __ _ ___| |_(_) ___ _ __ |
256 # | | __| '__/ _` | '_ \/ __|/ _` |/ __| __| |/ _ \| '_ \ |
257 # | | |_| | | (_| | | | \__ \ (_| | (__| |_| | (_) | | | | |
258 # | \__|_| \__,_|_| |_|___/\__,_|\___|\__|_|\___/|_| |_| |
259 # | |
260 # | _ |
261 # | | | ___ __ _ ___ _ _ ___ __ _ __ _ ___ |
262 # | | |/ _ \ / _` / __| | | | / __|/ _` |/ _` |/ _ \ |
263 # | | | (_) | (_| \__ \ | |_| \__ \ (_| | (_| | __/ |
264 # | |_|\___/ \__, |___/ \__,_|___/\__,_|\__, |\___| |
265 # | |___/ |___/ |
266 # '----------------------------------------------------------------------'
269 @get_parsed_item_data
270 def check_aws_rds_transaction_logs_usage(item, params, metrics):
271 transaction_logs_space = metrics['TransactionLogsDiskUsage']
272 yield 0, get_bytes_human_readable(transaction_logs_space)
274 try:
275 usage = 100.0 * transaction_logs_space / metrics['AllocatedStorage']
276 except (KeyError, ZeroDivisionError):
277 yield 1, 'Cannot calculate usage'
278 else:
279 yield check_levels(
280 usage,
281 "aws_rds_transaction_logs_disk_usage",
282 params.get('levels', (None, None)),
283 human_readable_func=get_percent_human_readable)
285 generation = metrics.get('TransactionLogsGeneration')
286 if generation:
287 yield 0, 'Size of transaction logs: %s/s' % generation
290 check_info['aws_rds.transaction_logs_usage'] = {
291 'inventory_function': lambda p:\
292 inventory_aws_generic(p, ['TransactionLogsDiskUsage', 'AllocatedStorage']),
293 'check_function': check_aws_rds_transaction_logs_usage,
294 'service_description': 'AWS/RDS %s Transaction Logs Usage',
295 'includes': ['aws.include'],
296 'has_perfdata': True,
297 'group': 'aws_rds_disk_usage',
301 # .--replication slot usage----------------------------------------------.
302 # | _ _ _ _ _ _ |
303 # | _ __ ___ _ __ | (_) ___ __ _| |_(_) ___ _ __ ___| | ___ | |_ |
304 # | | '__/ _ \ '_ \| | |/ __/ _` | __| |/ _ \| '_ \ / __| |/ _ \| __| |
305 # | | | | __/ |_) | | | (_| (_| | |_| | (_) | | | | \__ \ | (_) | |_ |
306 # | |_| \___| .__/|_|_|\___\__,_|\__|_|\___/|_| |_| |___/_|\___/ \__| |
307 # | |_| |
308 # | |
309 # | _ _ ___ __ _ __ _ ___ |
310 # | | | | / __|/ _` |/ _` |/ _ \ |
311 # | | |_| \__ \ (_| | (_| | __/ |
312 # | \__,_|___/\__,_|\__, |\___| |
313 # | |___/ |
314 # '----------------------------------------------------------------------'
317 @get_parsed_item_data
318 def check_aws_rds_replication_slot_usage(item, params, metrics):
319 replication_slot_space = metrics['ReplicationSlotDiskUsage']
320 yield 0, get_bytes_human_readable(replication_slot_space)
322 try:
323 usage = 100.0 * replication_slot_space / metrics['AllocatedStorage']
324 except (KeyError, ZeroDivisionError):
325 yield 1, 'Cannot calculate usage'
326 else:
327 yield check_levels(
328 usage,
329 "aws_rds_replication_slot_disk_usage",
330 params.get('levels', (None, None)),
331 human_readable_func=get_percent_human_readable)
334 check_info['aws_rds.replication_slot_usage'] = {
335 'inventory_function': lambda p:\
336 inventory_aws_generic(p, ['ReplicationSlotDiskUsage', 'AllocatedStorage']),
337 'check_function': check_aws_rds_replication_slot_usage,
338 'service_description': 'AWS/RDS %s Replication Slot Usage',
339 'includes': ['aws.include'],
340 'has_perfdata': True,
341 'group': 'aws_rds_disk_usage',
345 # .--disk IO-------------------------------------------------------------.
346 # | _ _ _ ___ ___ |
347 # | __| (_)___| | __ |_ _/ _ \ |
348 # | / _` | / __| |/ / | | | | | |
349 # | | (_| | \__ \ < | | |_| | |
350 # | \__,_|_|___/_|\_\ |___\___/ |
351 # | |
352 # '----------------------------------------------------------------------'
355 def check_aws_rds_disk_io(item, params, parsed):
356 now = time.time()
357 disks = {}
358 for disk_name, metrics in parsed.iteritems():
359 disks.setdefault(
360 disk_name, {
361 "read_ios": get_rate("aws_rds_disk_io_read_ios.%s" % item, now,
362 metrics['ReadIOPS']),
363 "write_ios": get_rate("aws_rds_disk_io_write_ios.%s" % item, now,
364 metrics['WriteIOPS']),
365 "read_throughput": get_rate("aws_rds_disk_io_read_throughput.%s" % item, now,
366 metrics['ReadThroughput']),
367 "write_throughput": get_rate("aws_rds_disk_io_write_throughput.%s" % item, now,
368 metrics['WriteThroughput']),
369 "read_latency": metrics['ReadLatency'] * 1000.0,
370 "write_latency": metrics['WriteLatency'] * 1000.0,
372 return check_diskstat_dict(item, params, disks)
375 check_info['aws_rds.disk_io'] = {
376 'inventory_function': lambda p:\
377 inventory_aws_generic(p, ['DiskQueueDepth', 'ReadIOPS', 'ReadLatency', 'ReadThroughput', 'WriteIOPS', 'WriteLatency', 'WriteThroughput']),
378 'check_function': check_aws_rds_disk_io,
379 'service_description': 'AWS/RDS %s Disk IO',
380 'includes': ['aws.include', 'diskstat.include'],
381 'group': 'diskstat',
382 'has_perfdata': True,
386 # .--connections---------------------------------------------------------.
387 # | _ _ |
388 # | ___ ___ _ __ _ __ ___ ___| |_(_) ___ _ __ ___ |
389 # | / __/ _ \| '_ \| '_ \ / _ \/ __| __| |/ _ \| '_ \/ __| |
390 # | | (_| (_) | | | | | | | __/ (__| |_| | (_) | | | \__ \ |
391 # | \___\___/|_| |_|_| |_|\___|\___|\__|_|\___/|_| |_|___/ |
392 # | |
393 # '----------------------------------------------------------------------'
396 @get_parsed_item_data
397 def check_aws_rds_connections(item, params, metrics):
398 yield check_levels(
399 metrics['DatabaseConnections'],
400 "aws_rds_connections",
401 params.get('levels', (None, None)),
402 infoname="In use")
405 check_info['aws_rds.connections'] = {
406 'inventory_function': lambda p:\
407 inventory_aws_generic(p, ['DatabaseConnections']),
408 'check_function': check_aws_rds_connections,
409 'service_description': 'AWS/RDS %s Connections',
410 'includes': ['aws.include'],
411 'has_perfdata': True,
412 'group': 'aws_rds_connections',
416 # .--agent jobs----------------------------------------------------------.
417 # | _ _ _ |
418 # | __ _ __ _ ___ _ __ | |_ (_) ___ | |__ ___ |
419 # | / _` |/ _` |/ _ \ '_ \| __| | |/ _ \| '_ \/ __| |
420 # | | (_| | (_| | __/ | | | |_ | | (_) | |_) \__ \ |
421 # | \__,_|\__, |\___|_| |_|\__| _/ |\___/|_.__/|___/ |
422 # | |___/ |__/ |
423 # '----------------------------------------------------------------------'
426 @get_parsed_item_data
427 def check_aws_rds_agent_jobs(item, params, metrics):
428 failed_agent_jobs = metrics['FailedSQLServerAgentJobsCount']
429 if failed_agent_jobs > 0:
430 state = 1
431 else:
432 state = 0
433 yield state, "Failed jobs during the last minute: %s" % failed_agent_jobs
436 check_info['aws_rds.agent_jobs'] = {
437 'inventory_function': lambda p:\
438 inventory_aws_generic(p, ['FailedSQLServerAgentJobsCount']),
439 'check_function': check_aws_rds_agent_jobs,
440 'service_description': 'AWS/RDS %s SQL Server Agent Jobs',
441 'includes': ['aws.include'],
445 # .--replica lag---------------------------------------------------------.
446 # | _ _ _ |
447 # | _ __ ___ _ __ | (_) ___ __ _ | | __ _ __ _ |
448 # | | '__/ _ \ '_ \| | |/ __/ _` | | |/ _` |/ _` | |
449 # | | | | __/ |_) | | | (_| (_| | | | (_| | (_| | |
450 # | |_| \___| .__/|_|_|\___\__,_| |_|\__,_|\__, | |
451 # | |_| |___/ |
452 # '----------------------------------------------------------------------'
455 @get_parsed_item_data
456 def check_aws_rds_replica_lag(item, params, metrics):
457 yield check_levels(
458 metrics['ReplicaLag'],
459 "aws_rds_replica_lag",
460 params.get('lag_levels', (None, None)),
461 human_readable_func=get_age_human_readable,
462 infoname="Lag")
464 oldest_replica_lag_space = metrics.get('OldestReplicationSlotLag')
465 if oldest_replica_lag_space is not None:
466 yield check_levels(
467 oldest_replica_lag_space,
468 "aws_rds_oldest_replication_slot_lag",
469 params.get('slot_levels', (None, None)),
470 human_readable_func=get_bytes_human_readable,
471 infoname="Oldest replication slot lag")
474 check_info['aws_rds.replica_lag'] = {
475 'inventory_function': lambda p:\
476 inventory_aws_generic(p, ['ReplicaLag']),
477 'check_function': check_aws_rds_replica_lag,
478 'service_description': 'AWS/RDS %s Replica Lag',
479 'includes': ['aws.include'],
480 'has_perfdata': True,
481 'group': 'aws_rds_replica_lag',