2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # .--mem.linux-----------------------------------------------------------.
29 # | _ __ ___ ___ _ __ ___ | (_)_ __ _ ___ __ |
30 # | | '_ ` _ \ / _ \ '_ ` _ \ | | | '_ \| | | \ \/ / |
31 # | | | | | | | __/ | | | | |_| | | | | | |_| |> < |
32 # | |_| |_| |_|\___|_| |_| |_(_)_|_|_| |_|\__,_/_/\_\ |
34 # +----------------------------------------------------------------------+
35 # | Specialized memory check for Linux that takes into account |
36 # | all of its specific information in /proc/meminfo. |
37 # '----------------------------------------------------------------------'
39 # BEWARE: half of the information and blob entries about /proc/meminfo
40 # in the internet is unprecise or even totally wrong!
43 # MemTotal: 24707592 kB
49 # Inactive: 13360444 kB
50 # Active(anon): 1481236 kB
51 # Inactive(anon): 371260 kB
52 # Active(file): 7275640 kB
53 # Inactive(file): 12989184 kB
54 # Unevictable: 964808 kB
56 # SwapTotal: 16777212 kB
57 # SwapFree: 16703328 kB
60 # AnonPages: 2774444 kB
64 # SReclaimable: 756236 kB
65 # SUnreclaim: 104792 kB
66 # KernelStack: 4176 kB
67 # PageTables: 15892 kB
71 # CommitLimit: 39014044 kB
72 # Committed_AS: 3539808 kB
73 # VmallocTotal: 34359738367 kB
74 # VmallocUsed: 347904 kB
75 # VmallocChunk: 34346795572 kB
76 # HardwareCorrupted: 0 kB
82 # Hugepagesize: 2048 kB
83 # DirectMap4k: 268288 kB
84 # DirectMap2M: 8112128 kB
85 # DirectMap1G: 16777216 kB
87 # This is from an earlier kernel (CentOS 5.5). Some entries
101 # SwapTotal: 2064376 kB
102 # SwapFree: 2062756 kB
105 # AnonPages: 43080 kB
108 # PageTables: 3208 kB
111 # CommitLimit: 2252964 kB
112 # Committed_AS: 125968 kB
113 # VmallocTotal: 34359738367 kB
114 # VmallocUsed: 18112 kB
115 # VmallocChunk: 34359719415 kB
119 # Hugepagesize: 2048 kB
121 # Yet earlier kernel (SLES 9):
123 # MemTotal: 6224268 kB
124 # MemFree: 2913660 kB
129 # Inactive: 1276156 kB
130 # HighTotal: 5373824 kB
131 # HighFree: 2233984 kB
132 # LowTotal: 850444 kB
134 # SwapTotal: 1052280 kB
135 # SwapFree: 1052280 kB
140 # Committed_AS: 2758332 kB
141 # PageTables: 7672 kB
142 # VmallocTotal: 112632 kB
143 # VmallocUsed: 9324 kB
144 # VmallocChunk: 103180 kB
147 # Hugepagesize: 2048 kB
149 factory_settings
["mem_linux_default_levels"] = {
150 "levels_virtual": ("perc_used", (80.0, 90.0)),
151 "levels_total": ("perc_used", (120.0, 150.0)),
152 "levels_shm": ("perc_used", (20.0, 30.0)),
153 "levels_pagetables": ("perc_used", (8.0, 16.0)),
154 "levels_committed": ("perc_used", (100.0, 150.0)),
155 "levels_commitlimit": ("perc_free", (20.0, 10.0)),
156 "levels_vmalloc": ("abs_free", (50 * 1024 * 1024, 30 * 1024 * 1024)),
157 "levels_hardwarecorrupted": ("abs_used", (1, 1)),
161 def is_linux_meminfo(meminfo
):
162 return "PageTables" in meminfo
and "Writeback" in meminfo
and "Committed_AS" in meminfo
165 def inventory_mem_linux(info
):
166 meminfo
= parse_proc_meminfo_bytes(info
)
167 if is_linux_meminfo(meminfo
):
171 def check_mem_levels(title
,
177 show_percentage
=False,
180 of_value
= total
# Reference for percentage levels
187 infotext
= "%s: %s" % (title
, get_bytes_human_readable(value
))
189 infotext
= "%s used: %s of %s" % (title
, get_bytes_human_readable(used
),
190 get_bytes_human_readable(total
))
193 if levels
and levels
!= "ignore":
195 if how
== "predictive":
196 return 3, "Predictive levels for memory check not yet implemented"
197 # TODO: Hier brauchen wir den Namen der RRD-variable! Das klappt aber
198 # nicht, wenn hier Werte zusammengerechnet wurden. Wir sollten die
199 # Predictive Levels nur für ausgewählte Werte zulassen.
200 # return check_levels(used, levels[1], unit = "GB")
202 warn
, crit
= levels
[1]
203 if how
.startswith("perc_"):
204 perc_used
= 100.0 * float(used
) / of_value
205 perc_free
= 100 - perc_used
206 if how
== "perc_used":
211 levels_text
= " (%.1f%%%s, " % (perc_used
, t
)
212 if perc_used
>= crit
:
214 elif perc_used
>= warn
:
217 elif how
== "perc_free":
222 levels_text
= " (%.1f%% %s, " % (perc_free
, t
)
225 elif perc_free
< warn
:
230 infotext
+= levels_text
+ "warn/crit at %.1f%%/%.1f%%)" % (warn
, crit
)
233 if how
== "abs_used":
246 infotext
+= " (warn/crit at %s/%s)" % (get_bytes_human_readable(warn
),
247 get_bytes_human_readable(crit
))
249 if not perc_shown
and show_percentage
:
250 infotext
+= " (%.1f%%)" % (100.0 * float(used
) / of_value
)
251 return state
, infotext
254 def check_mem_linux(_no_item
, params
, info
):
255 meminfo
= parse_proc_meminfo_bytes(info
)
257 yield 3, "Data not found in agent output"
260 # SReclaimable is not available for older kernels
261 if "SReclaimable" not in meminfo
:
262 meminfo
["SReclaimable"] = 0
263 meminfo
["SUnreclaim"] = meminfo
["Slab"]
265 # Compute memory used by caches, that can be considered "free"
266 meminfo
["Caches"] = meminfo
["Cached"] + meminfo
["Buffers"] \
267 + meminfo
["SwapCached"] + meminfo
["SReclaimable"]
270 meminfo
["MemUsed"] = meminfo
["MemTotal"] - meminfo
["MemFree"] - meminfo
["Caches"]
271 yield check_mem_levels(
275 params
.get("levels_ram"),
276 show_percentage
=not meminfo
["SwapTotal"])
278 # Swap - but only if available
279 meminfo
["SwapUsed"] = meminfo
["SwapTotal"] - meminfo
["SwapFree"]
280 if meminfo
["SwapTotal"]:
281 yield check_mem_levels("Swap", meminfo
["SwapUsed"], meminfo
["SwapTotal"],
282 params
.get("levels_swap"))
284 # Total virtual memory
285 meminfo
["TotalTotal"] = meminfo
["MemTotal"] + meminfo
["SwapTotal"]
286 meminfo
["TotalUsed"] = meminfo
["MemUsed"] + meminfo
["SwapUsed"]
287 r
= check_mem_levels(
288 "Total virtual memory",
289 meminfo
["TotalUsed"],
290 meminfo
["TotalTotal"],
291 params
.get("levels_virtual"),
292 show_percentage
=True)
293 if r
[0] or meminfo
["SwapTotal"]:
294 yield r
# only display if there is swap or status is non-OK
296 # Total memory / in relation to RAM
297 r
= check_mem_levels(
299 meminfo
["TotalUsed"],
300 meminfo
["TotalTotal"],
301 params
.get("levels_total"),
303 of_value
=meminfo
["MemTotal"])
305 yield r
# only display if non-OK
308 if "Shmem" in meminfo
:
309 r
= check_mem_levels(
313 params
.get("levels_shm"),
316 yield r
# only display if non-OK
319 r
= check_mem_levels(
321 meminfo
["PageTables"],
323 params
.get("levels_pagetables"),
326 yield r
# only display if non-OK
329 meminfo
["Pending"] = \
331 + meminfo
.get("Writeback", 0) \
332 + meminfo
.get("NFS_Unstable", 0) \
333 + meminfo
.get("Bounce", 0) \
334 + meminfo
.get("WritebackTmp", 0)
336 r
= check_mem_levels(
340 params
.get("levels_writeback"),
343 yield r
# only display if non-OK
346 r
= check_mem_levels(
348 meminfo
["Committed_AS"],
349 meminfo
["TotalTotal"],
350 params
.get("levels_committed"),
351 of_what
="RAM + Swap")
353 yield r
# only display if non-OK
356 if "CommitLimit" in meminfo
:
357 r
= check_mem_levels(
359 meminfo
["TotalTotal"] - meminfo
["CommitLimit"],
360 meminfo
["TotalTotal"],
361 params
.get("levels_commitlimit"),
362 of_what
="RAM + Swap")
364 yield r
# only display if non-OK
367 if "MemAvailable" in meminfo
:
368 r
= check_mem_levels(
370 meminfo
["MemTotal"] - meminfo
["MemAvailable"],
372 params
.get("levels_available"),
376 yield r
# only display if non-OK
379 # newer kernel version report wrong data,
380 # i.d. both VmallocUsed and Chunk equal zero
381 if not (meminfo
["VmallocUsed"] == 0 and meminfo
["VmallocChunk"] == 0):
382 r
= check_mem_levels(
383 "Largest Free VMalloc Chunk",
384 meminfo
["VmallocTotal"] - meminfo
["VmallocChunk"],
385 meminfo
["VmallocTotal"],
386 params
.get("levels_vmalloc"),
387 of_what
="VMalloc Area",
390 yield r
# only display if non-OK
393 if "HardwareCorrupted" in meminfo
:
394 r
= check_mem_levels(
395 "Hardware Corrupted",
396 meminfo
["HardwareCorrupted"],
398 params
.get("levels_hardwarecorrupted"),
401 yield r
# only display if non-OK
403 # Now send performance data. We simply output *all* fields of meminfo
404 # except for a few really useless values
406 items
= meminfo
.items()
408 for name
, value
in items
:
409 if name
.startswith("DirectMap"):
412 "Vmalloc") and meminfo
["VmallocTotal"] > 2**40: # useless on 64 Bit system
414 if name
.startswith("Huge"):
415 if meminfo
["HugePages_Total"] == 0: # omit useless data
417 if name
== "Hugepagesize":
418 continue # not needed
419 value
= value
* meminfo
["Hugepagesize"] # convert number to actual memory size
420 perfdata
.append((camelcase_to_underscored(name
.replace("(", "_").replace(")", "")), value
))
421 yield 0, "", perfdata
424 # ThisIsACamel -> this_is_a_camel
425 def camelcase_to_underscored(name
):
426 previous_lower
= False
427 previous_underscore
= True
431 if previous_lower
and not previous_underscore
:
433 previous_lower
= False
434 previous_underscore
= False
437 previous_lower
= False
438 previous_underscore
= True
441 previous_lower
= True
442 previous_underscore
= False
447 check_info
["mem.linux"] = {
448 'inventory_function': inventory_mem_linux
,
449 'check_function': check_mem_linux
,
450 'service_description': 'Memory',
451 'default_levels_variable': 'mem_linux_default_levels',
452 'has_perfdata': True,
453 'group': 'memory_linux',
454 "handle_real_time_checks": True,
455 'includes': ['mem.include'],
459 # .--mem.used------------------------------------------------------------.
461 # | _ __ ___ ___ _ __ ___ _ _ ___ ___ __| | |
462 # | | '_ ` _ \ / _ \ '_ ` _ \ | | | / __|/ _ \/ _` | |
463 # | | | | | | | __/ | | | | || |_| \__ \ __/ (_| | |
464 # | |_| |_| |_|\___|_| |_| |_(_)__,_|___/\___|\__,_| |
466 # +----------------------------------------------------------------------+
467 # | Memory check that takes into account the swap space. This check is |
468 # | used for unixoide operating systems. |
469 # '----------------------------------------------------------------------'
472 def parse_proc_meminfo(info
):
476 parsed
[line
[0][:-1]] = int(line
[1])
477 except (ValueError, IndexError) as _exc
:
482 # The following variable is obsolete. It is kept here so that Check_MK
483 # won't fail if it's found in main.mk
484 mem_extended_perfdata
= None
487 def inventory_mem_used(info
):
488 meminfo
= parse_proc_meminfo(info
)
489 if "MemTotal" in meminfo \
490 and "PageTotal" not in meminfo \
491 and not is_linux_meminfo(meminfo
): # handled by more modern check
495 def check_mem_used(_no_item
, params
, info
):
496 meminfo
= parse_proc_meminfo(info
)
497 return check_memory(params
, meminfo
)
500 check_info
['mem.used'] = {
501 "check_function": check_mem_used
,
502 "inventory_function": inventory_mem_used
,
503 "service_description": "Memory used",
504 "has_perfdata": True,
506 "default_levels_variable": "memory_default_levels",
507 "includes": ["mem.include"],
508 "handle_real_time_checks": True,
512 # .--mem.win-------------------------------------------------------------.
514 # | _ __ ___ ___ _ __ ___ __ _(_)_ __ |
515 # | | '_ ` _ \ / _ \ '_ ` _ \\ \ /\ / / | '_ \ |
516 # | | | | | | | __/ | | | | |\ V V /| | | | | |
517 # | |_| |_| |_|\___|_| |_| |_(_)_/\_/ |_|_| |_| |
519 # +----------------------------------------------------------------------+
520 # | Windows now has a dedicated memory check that reflect the special |
521 # | nature of the page file. |
522 # '----------------------------------------------------------------------'
524 # Special memory and page file check for Windows
525 factory_settings
["memory_win_default_levels"] = {
526 "memory": (80.0, 90.0),
527 "pagefile": (80.0, 90.0),
531 def inventory_mem_win(info
):
532 meminfo
= parse_proc_meminfo(info
)
533 if "MemTotal" in meminfo
and "PageTotal" in meminfo
:
537 def check_mem_windows(_no_item
, params
, info
):
538 meminfo
= parse_proc_meminfo(info
)
541 def _get_levels_on_used_mb(param_key
, total_mb
):
542 levels
= params
.get(param_key
)
543 if not isinstance(levels
, tuple):
544 # Predictive levels have no level information in the performance data
547 if isinstance(levels
[0], float):
548 # float type means percent
549 warn
= total_mb
* levels
[0] / 100
551 # int means levels on *free* space
552 warn
= total_mb
- levels
[0]
553 if isinstance(levels
[1], float):
554 crit
= total_mb
* levels
[1] / 100
556 crit
= total_mb
- levels
[1]
559 for title
, prefix
, paramname
in [("Memory usage", "Mem", "memory"),
560 ("Commit charge", "Page", "pagefile")]:
562 total_kb
= meminfo
.get("%sTotal" % prefix
)
563 free_kb
= meminfo
.get("%sFree" % prefix
)
564 if None in (total_kb
, free_kb
):
567 total_mb
= total_kb
/ 1024.0
568 free_mb
= free_kb
/ 1024.0
569 used_kb
= total_kb
- free_kb
570 used_mb
= total_mb
- free_mb
571 perc
= 100.0 * used_kb
/ total_kb
573 warn
, crit
= _get_levels_on_used_mb(paramname
, total_mb
)
575 infotext
= "%s: %s (%s/%s)" % (title
, get_percent_human_readable(perc
),
576 get_bytes_human_readable(used_kb
* 1024),
577 get_bytes_human_readable(total_kb
* 1024))
579 perfdata
= [(paramname
, used_mb
, warn
, crit
, 0, total_mb
)]
581 perfdata
.append(("mem_total", total_mb
))
582 elif prefix
== "Page":
583 perfdata
.append(("pagefile_total", total_mb
))
585 # Do averaging, if configured, just for matching the levels
586 if "average" in params
:
587 average_min
= params
["average"]
588 used_kb
= get_average(
589 "mem.win.%s" % paramname
, now
, used_kb
, average_min
, initialize_zero
=False)
590 used_mb
= used_kb
/ 1024.0
591 perc
= 100.0 * used_kb
/ total_kb
592 infotext
+= ", %d min average: %s (%s)" % (average_min
,
593 get_percent_human_readable(perc
),
594 get_bytes_human_readable(used_kb
* 1024))
595 perfdata
.append((paramname
+ "_avg", used_mb
))
597 # Now check the levels
598 if (warn
, crit
) != (None, None):
602 elif used_mb
>= warn
:
607 if "average" in params
:
609 dsname
= "%s_avg" % paramname
614 state
, infoadd
, perfadd
= check_levels(
615 used_mb
, # Current value stored in MB in RRDs
618 unit
="GB", # Levels are specified in GB...
619 scale
=1024, # ... in WATO ValueSpec
622 infotext
+= ", " + infoadd
625 yield state
, infotext
, perfdata
628 check_info
["mem.win"] = {
629 'check_function': check_mem_windows
,
630 'inventory_function': inventory_mem_win
,
631 'service_description': 'Memory and pagefile',
632 'has_perfdata': True,
633 'group': 'memory_pagefile_win',
634 'default_levels_variable': 'memory_win_default_levels',
635 "handle_real_time_checks": True,
639 # .--mem.vmalloc---------------------------------------------------------.
641 # | _ __ ___ ___ _ __ ___ __ ___ __ ___ __ _| | | ___ ___ |
642 # | | '_ ` _ \ / _ \ '_ ` _ \\ \ / / '_ ` _ \ / _` | | |/ _ \ / __| |
643 # | | | | | | | __/ | | | | |\ V /| | | | | | (_| | | | (_) | (__ |
644 # | |_| |_| |_|\___|_| |_| |_(_)_/ |_| |_| |_|\__,_|_|_|\___/ \___| |
646 # +----------------------------------------------------------------------+
647 # | This very specific check checks the usage and fragmentation of the |
648 # | address space 'vmalloc' that can be problematic on 32-Bit systems. |
649 # | It is superseeded by the new check mem.linux and will be removed |
651 # '----------------------------------------------------------------------'
653 # warn, crit, warn_chunk, crit_chunk. Integers are in MB, floats are in percent
654 mem_vmalloc_default_levels
= (80.0, 90.0, 64, 32)
657 def inventory_mem_vmalloc(info
):
658 meminfo
= parse_proc_meminfo(info
)
659 if is_linux_meminfo(meminfo
):
660 return # handled by new Linux memory check
662 # newer kernel version report wrong data,
663 # i.d. both VmallocUsed and Chunk equal zero
664 if "VmallocTotal" in meminfo
and \
665 not (meminfo
["VmallocUsed"] == 0 and meminfo
["VmallocChunk"] == 0):
666 # Do not checks this on 64 Bit systems. They have almost
668 vmalloc
= meminfo
["VmallocTotal"] / 1024.4
670 return [(None, "mem_vmalloc_default_levels")]
673 def check_mem_vmalloc(item
, params
, info
):
674 meminfo
= parse_proc_meminfo(info
)
675 total_mb
= meminfo
["VmallocTotal"] / 1024.0
676 used_mb
= meminfo
["VmallocUsed"] / 1024.0
677 chunk_mb
= meminfo
["VmallocChunk"] / 1024.0
678 warn
, crit
, warn_chunk
, crit_chunk
= params
683 for var
, w
, c
, v
, neg
, what
in [("used", warn
, crit
, used_mb
, False, "used"),
684 ("chunk", warn_chunk
, crit_chunk
, chunk_mb
, True,
687 # convert levels from percentage to MB values
688 if isinstance(w
, float):
689 w_mb
= total_mb
* w
/ 100
693 if isinstance(c
, float):
694 c_mb
= total_mb
* c
/ 100
698 infotxt
= "%s %.1f MB" % (what
, v
)
699 if (v
>= c_mb
) != neg
:
701 infotxt
+= " (critical at %.1f MB!!)" % c_mb
702 elif (v
>= w_mb
) != neg
:
704 infotxt
+= " (warning at %.1f MB!)" % w_mb
707 state
= max(state
, s
)
708 infotxts
.append(infotxt
)
709 perfdata
.append((var
, v
, w_mb
, c_mb
, 0, total_mb
))
710 return (state
, ("total %.1f MB, " % total_mb
) + ", ".join(infotxts
), perfdata
)
713 check_info
["mem.vmalloc"] = {
714 'inventory_function': inventory_mem_vmalloc
,
715 'check_function': check_mem_vmalloc
,
716 'service_description': 'Vmalloc address space',
717 'has_perfdata': True,
718 "handle_real_time_checks": True,