checks/df.include

   1 #!/usr/bin/python
   2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
   3 # +------------------------------------------------------------------+
   4 # |             ____ _               _        __  __ _  __           |
   5 # |            / ___| |__   ___  ___| | __   |  \/  | |/ /           |
   6 # |           | |   | '_ \ / _ \/ __| |/ /   | |\/| | ' /            |
   7 # |           | |___| | | |  __/ (__|   <    | |  | | . \            |
   8 # |            \____|_| |_|\___|\___|_|\_\___|_|  |_|_|\_\           |
   9 # |                                                                  |
  10 # | Copyright Mathias Kettner 2014             mk@mathias-kettner.de |
  11 # +------------------------------------------------------------------+
  12 #
  13 # This file is part of Check_MK.
  14 # The official homepage is at http://mathias-kettner.de/check_mk.
  15 #
  16 # check_mk is free software;  you can redistribute it and/or modify it
  17 # under the  terms of the  GNU General Public License  as published by
  18 # the Free Software Foundation in version 2.  check_mk is  distributed
  19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY;  with-
  20 # out even the implied warranty of  MERCHANTABILITY  or  FITNESS FOR A
  21 # PARTICULAR PURPOSE. See the  GNU General Public License for more de-
  22 # tails. You should have  received  a copy of the  GNU  General Public
  23 # License along with GNU Make; see the file  COPYING.  If  not,  write
  24 # to the Free Software Foundation, Inc., 51 Franklin St,  Fifth Floor,
  25 # Boston, MA 02110-1301 USA.
  26
  27 # If the size_trend function is not provided via size_trend.include raise
  28 # an exception to provide an error message to the user. If dependencies
  29 # between include files are possible this will not be necessary anymore.
  30 if 'size_trend' not in globals():
  31
  32     def size_trend(*_args, **_kwargs):  # pylint: disable=function-redefined
  33         raise MKGeneralException('Function size_trend not found. Please include '
  34                                  '"size_trend.include" in your check')
  35
  36
  37 # Common include file for all filesystem checks (df, df_netapp, hr_fs, ...)
  38
  39 # Settings for filesystem checks (df, df_vms, df_netapp and maybe others)
  40 filesystem_levels = []  # obsolete. Just here to check config and warn if changed
  41 filesystem_default_levels = {}  # can also be dropped some day in future
  42
  43 # Filesystems to ignore (shouldn't be sent by agent anyway)
  44 inventory_df_exclude_mountpoints = ['/dev']
  45
  46 # Grouping of filesystems into groups that are monitored as one entity
  47 # Example:
  48 # filesystem_groups = [
  49 #     ( [ ( "Storage pool", "/data/pool*" ) ], [ 'linux', 'prod' ], ALL_HOSTS ),
  50 #     ( [ ( "Backup space 1", "/usr/backup/*.xyz" ),
  51 #         ( "Backup space 2", "/usr/backup2/*.xyz" ) ], ALL_HOSTS ),
  52 # ]
  53 filesystem_groups = []
  54
  55 # Alternative syntax for parameters:
  56 # {  "levels"         : (80, 90),  # levels in percent
  57 #    "magic"          : 0.5,       # magic factor
  58 #    "magic_normsize" : 20,        # normsize in GB
  59 #    "levels_low"     : (50, 60),  # magic never lowers levels below this (percent)
  60 #    "trend_range"    : 24,        # hours
  61 #    "trend_mb"       : (10, 20),  # MB of change during trend_range
  62 #    "trend_perc"     : (1, 2),    # Percent change during trend_range
  63 #    "trend_timeleft" : (72, 48)   # run time left in hours until full
  64 # }
  65
  66 factory_settings["filesystem_default_levels"] = {
  67     "levels": (80.0, 90.0),  # warn/crit in percent
  68     "magic_normsize": 20,  # Standard size if 20 GB
  69     "levels_low": (50.0, 60.0),  # Never move warn level below 50% due to magic factor
  70     "trend_range": 24,
  71     "trend_perfdata": True,  # do send performance data for trends
  72     "show_levels": "onmagic",
  73     "inodes_levels": (10.0, 5.0),
  74     "show_inodes": "onlow",
  75     "show_reserved": False,
  76 }
  77
  78
  79 def mountpoints_in_group(mplist, patterns):
  80     matching_mountpoints = set()
  81     for mountpoint in mplist:
  82         for pattern in patterns:
  83             if fnmatch.fnmatch(mountpoint, pattern):
  84                 matching_mountpoints.add(mountpoint)
  85                 break
  86     return matching_mountpoints
  87
  88
  89 def ungrouped_mountpoints_and_groups(mplist, group_patterns):
  90     ungrouped_mountpoints = set(mplist)
  91     groups = {}
  92     for group_name, patterns in group_patterns.items():
  93         groups[group_name] = mountpoints_in_group(mplist, patterns)
  94         ungrouped_mountpoints = ungrouped_mountpoints.difference(groups[group_name])
  95     return ungrouped_mountpoints, groups
  96
  97
  98 def df_inventory(mplist):
  99     group_patterns = {}
 100     for line in host_extra_conf(host_name(), filesystem_groups):
 101         for group_name, pattern in line:
 102             group_patterns.setdefault(group_name, []).append(pattern)
 103
 104     ungrouped_mountpoints, groups = ungrouped_mountpoints_and_groups(mplist, group_patterns)
 105
 106     return [(mp, {}) for mp in ungrouped_mountpoints] \
 107             + [(group, {"patterns" : group_patterns[group]}) for group in groups]
 108
 109
 110 # Users might have set filesystem_default_levels to old format like (80, 90)
 111
 112
 113 # needed by df, df_netapp and vms_df and maybe others in future:
 114 # compute warning and critical levels. Takes into account the size of
 115 # the filesystem and the magic number. Since the size is only known at
 116 # check time this function's result cannot be precompiled.
 117 def get_filesystem_levels(mountpoint, size_gb, params):
 118     mega = 1024 * 1024
 119     giga = mega * 1024
 120     # Start with factory settings
 121     levels = factory_settings["filesystem_default_levels"].copy()
 122
 123     def convert_legacy_levels(value):
 124         if isinstance(params, tuple) or not params.get("flex_levels"):
 125             return tuple(map(float, value))
 126         return value
 127
 128     # convert default levels to dictionary. This is in order support
 129     # old style levels like (80, 90)
 130     if isinstance(filesystem_default_levels, dict):
 131         fs_default_levels = filesystem_default_levels.copy()
 132         fs_levels = fs_default_levels.get("levels")
 133         if fs_levels:
 134             fs_default_levels["levels"] = convert_legacy_levels(fs_levels)
 135         levels.update(filesystem_default_levels)
 136     else:
 137         levels = factory_settings["filesystem_default_levels"].copy()
 138         levels["levels"] = convert_legacy_levels(filesystem_default_levels[:2])
 139         if len(filesystem_default_levels) == 2:
 140             levels["magic"] = None
 141         else:
 142             levels["magic"] = filesystem_default_levels[2]
 143
 144     # If params is a dictionary, make that override the default values
 145     if isinstance(params, dict):
 146         levels.update(params)
 147
 148     else:  # simple format - explicitely override levels and magic
 149         levels["levels"] = convert_legacy_levels(params[:2])
 150         if len(params) >= 3:
 151             levels["magic"] = params[2]
 152
 153     # Determine real warn, crit levels
 154     if isinstance(levels["levels"], tuple):
 155         warn, crit = levels["levels"]
 156     else:
 157         # A list of levels. Choose the correct one depending on the
 158         # size of the current filesystem. We do not make the first
 159         # rule match, but that with the largest size_gb. That way
 160         # the order of the entries is not important.
 161         found = False
 162         found_size = 0
 163         for to_size, this_levels in levels["levels"]:
 164             if size_gb * giga > to_size and to_size >= found_size:
 165                 warn, crit = this_levels
 166                 found_size = to_size
 167                 found = True
 168         if not found:
 169             warn, crit = 100.0, 100.0  # entry not found in list
 170
 171     # Take into account magic scaling factor (third optional argument
 172     # in check params). A factor of 1.0 changes nothing. Factor should
 173     # be > 0 and <= 1. A smaller factor raises levels for big file systems
 174     # bigger than 100 GB and lowers it for file systems smaller than 100 GB.
 175     # Please run df_magic_factor.py to understand how it works.
 176
 177     magic = levels.get("magic")
 178     # We need a way to disable the magic factor so check
 179     # if magic not 1.0
 180     if magic and magic != 1.0:
 181         # convert warn/crit to percentage
 182         if not isinstance(warn, float):
 183             warn = savefloat(warn * mega / float(size_gb * giga)) * 100
 184         if not isinstance(crit, float):
 185             crit = savefloat(crit * mega / float(size_gb * giga)) * 100
 186
 187         normsize = levels["magic_normsize"]
 188         hgb_size = size_gb / float(normsize)
 189         felt_size = hgb_size**magic
 190         scale = felt_size / hgb_size
 191         warn_scaled = 100 - ((100 - warn) * scale)
 192         crit_scaled = 100 - ((100 - crit) * scale)
 193
 194         # Make sure, levels do never get too low due to magic factor
 195         lowest_warning_level, lowest_critical_level = levels["levels_low"]
 196         if warn_scaled < lowest_warning_level:
 197             warn_scaled = lowest_warning_level
 198         if crit_scaled < lowest_critical_level:
 199             crit_scaled = lowest_critical_level
 200     else:
 201         if not isinstance(warn, float):
 202             warn_scaled = savefloat(warn * mega / float(size_gb * giga)) * 100
 203         else:
 204             warn_scaled = warn
 205
 206         if not isinstance(crit, float):
 207             crit_scaled = savefloat(crit * mega / float(size_gb * giga)) * 100
 208         else:
 209             crit_scaled = crit
 210
 211     size_mb = size_gb * 1024
 212     warn_mb = savefloat(size_mb * warn_scaled / 100)
 213     crit_mb = savefloat(size_mb * crit_scaled / 100)
 214     levels["levels_mb"] = (warn_mb, crit_mb)
 215     if isinstance(warn, float):
 216         if warn_scaled < 0 and crit_scaled < 0:
 217             label = 'warn/crit at free space below'
 218             warn_scaled *= -1
 219             crit_scaled *= -1
 220         else:
 221             label = 'warn/crit at'
 222         levels["levels_text"] = "(%s %s/%s)" % (label, get_percent_human_readable(warn_scaled),
 223                                                 get_percent_human_readable(crit_scaled))
 224     else:
 225         if warn * mega < 0 and crit * mega < 0:
 226             label = 'warn/crit at free space below'
 227             warn *= -1
 228             crit *= -1
 229         else:
 230             label = 'warn/crit at'
 231         warn_hr = get_bytes_human_readable(warn * mega)
 232         crit_hr = get_bytes_human_readable(crit * mega)
 233         levels["levels_text"] = "(%s %s/%s)" % (label, warn_hr, crit_hr)
 234
 235     if "inodes_levels" in params:
 236         if isinstance(levels["inodes_levels"], tuple):
 237             warn, crit = levels["inodes_levels"]
 238         else:
 239             # A list of inode levels. Choose the correct one depending on the
 240             # size of the current filesystem. We do not make the first
 241             # rule match, but that with the largest size_gb. That way
 242             # the order of the entries is not important.
 243             found = False
 244             found_size = 0
 245             for to_size, this_levels in levels["inodes_levels"]:
 246                 if size_gb * giga > to_size and to_size >= found_size:
 247                     warn, crit = this_levels
 248                     found_size = to_size
 249                     found = True
 250             if not found:
 251                 warn, crit = 100.0, 100.0  # entry not found in list
 252         levels["inodes_levels"] = warn, crit
 253     else:
 254         levels["inodes_levels"] = (None, None)
 255
 256     return levels
 257
 258
 259 # New function for checks that support groups.
 260 def df_check_filesystem_list(item, params, fslist_blocks, fslist_inodes=None):
 261
 262     blocks_info = {
 263         mp: {
 264             "size_mb": size_mb,
 265             "avail_mb": avail_mb,
 266             "reserved_mb": reserved_mb,
 267         } for (mp, size_mb, avail_mb, reserved_mb) in fslist_blocks
 268     }
 269
 270     if fslist_inodes:
 271         inodes_info = {
 272             mp: {
 273                 "inodes_total": inodes_total,
 274                 "inodes_avail": inodes_avail,
 275             } for (mp, inodes_total, inodes_avail) in fslist_inodes
 276         }
 277     else:
 278         inodes_info = {}
 279
 280     mplist = blocks_info.keys()
 281
 282     if "patterns" in params:
 283         patterns = params["patterns"]
 284         matching_mountpoints = mountpoints_in_group(mplist, patterns)
 285         count = len(matching_mountpoints)
 286         if count == 0:
 287             return 3, "No filesystem matching the patterns"
 288
 289         total_size_mb = sum(block_info["size_mb"]
 290                             for (mp, block_info) in blocks_info.items()
 291                             if mp in matching_mountpoints)
 292         total_avail_mb = sum(block_info["avail_mb"]
 293                              for (mp, block_info) in blocks_info.items()
 294                              if mp in matching_mountpoints)
 295         total_reserved_mb = sum(block_info["reserved_mb"]
 296                                 for (mp, block_info) in blocks_info.items()
 297                                 if mp in matching_mountpoints)
 298
 299         total_inodes = sum(inode_info["inodes_total"]
 300                            for (mp, inode_info) in inodes_info.items()
 301                            if mp in matching_mountpoints)
 302         total_inodes_avail = sum(inode_info["inodes_avail"]
 303                                  for (mp, inode_info) in inodes_info.items()
 304                                  if mp in matching_mountpoints)
 305
 306         status, infotext, perfdata = df_check_filesystem_single(item, total_size_mb, total_avail_mb,
 307                                                                 total_reserved_mb, total_inodes,
 308                                                                 total_inodes_avail, params)
 309         infotext += " (%d filesystems)" % count
 310         return status, infotext, perfdata
 311     else:
 312         if item in blocks_info:
 313             mp = item
 314             return df_check_filesystem_single(
 315                 mp, blocks_info[mp]["size_mb"], blocks_info[mp]["avail_mb"],
 316                 blocks_info[mp]["reserved_mb"],
 317                 inodes_info.get(mp, {}).get("inodes_total", None),
 318                 inodes_info.get(mp, {}).get("inodes_avail", None), params)
 319         return 3, "filesystem not found"
 320
 321
 322 def df_check_filesystem_single(mountpoint,
 323                                size_mb,
 324                                avail_mb,
 325                                reserved_mb,
 326                                inodes_total,
 327                                inodes_avail,
 328                                params,
 329                                this_time=None):
 330     if size_mb == 0:
 331         return (1, "Size of filesystem is 0 MB", [])
 332
 333     try:
 334         show_levels = params.get("show_levels")
 335         subtract_reserved = bool(params.get("subtract_reserved")) and reserved_mb > 0
 336         show_reserved = params.get("show_reserved") and reserved_mb > 0
 337     except AttributeError:
 338         show_levels = False
 339         subtract_reserved = False
 340         show_reserved = False
 341
 342     used_mb = size_mb - avail_mb
 343     used_max = size_mb
 344     if subtract_reserved:
 345         used_mb -= reserved_mb
 346         used_max -= reserved_mb
 347
 348     used_perc = 100.0 * (float(used_mb) / used_max)
 349
 350     # Get warning and critical levels already with 'magic factor' applied
 351     levels = get_filesystem_levels(mountpoint, size_mb / 1024., params)
 352     warn_mb, crit_mb = levels["levels_mb"]
 353     warn_inode, crit_inode = levels["inodes_levels"]
 354
 355     used_hr = get_bytes_human_readable(used_mb * 1024 * 1024)
 356     used_max_hr = get_bytes_human_readable(used_max * 1024 * 1024)
 357     used_perc_hr = get_percent_human_readable(used_perc)
 358     # If both numbers end with the same unit, then drop the first one
 359     if used_hr[-2:] == used_max_hr[-2:]:
 360         used_hr = used_hr[:-3]
 361
 362     infotext = "%s used (%s of %s)" % (used_perc_hr, used_hr, used_max_hr)
 363
 364     if warn_mb < 0.0:
 365         # Negative levels, so user configured thresholds based on space left. Calculate the
 366         # upper thresholds based on the size of the filesystem
 367         crit_mb = used_max + crit_mb
 368         warn_mb = used_max + warn_mb
 369
 370     status = 0
 371     if used_mb >= crit_mb:
 372         status = 2
 373     elif used_mb >= warn_mb:
 374         status = 1
 375
 376     # TODO: In some future version use a fixed name as perf variable
 377     perf_var = mountpoint.replace(" ", "_")
 378     perfdata = [(perf_var, used_mb, warn_mb, crit_mb, 0, size_mb), ('fs_size', size_mb)]
 379
 380     if show_levels == "always" or \
 381         (show_levels == "onproblem" and status > 0) or \
 382         (show_levels == "onmagic" and (status > 0 or levels.get("magic", 1.0) != 1.0)):
 383         infotext += ", " + levels["levels_text"]
 384
 385     if show_reserved:
 386         reserved_perc = 100.0 * float(reserved_mb) / size_mb
 387         reserved_perc_hr = get_percent_human_readable(reserved_perc)
 388         reserved_hr = get_bytes_human_readable(reserved_mb * 1024 * 1024)
 389         if subtract_reserved:
 390             infotext += ", additionally reserved for root: %s" % reserved_hr
 391         else:
 392             infotext += ", therein reserved for root: %s (%s)" \
 393                         % (reserved_perc_hr, reserved_hr)
 394
 395     if subtract_reserved:
 396         perfdata.append(("fs_free", avail_mb, None, None, 0, size_mb))
 397
 398     if subtract_reserved or show_reserved:
 399         perfdata.append(("reserved", reserved_mb))
 400
 401     if levels.get("trend_range"):
 402         trend_status, trend_infotext, trend_perfdata = size_trend('df', mountpoint, "disk", levels,
 403                                                                   used_mb, size_mb, this_time)
 404         status = max(status, trend_status)
 405         infotext += trend_infotext
 406         perfdata.extend(trend_perfdata)
 407
 408     # Check inode levels
 409     inode_status, problems = 0, []
 410     if inodes_total:
 411         inodes_avail_perc = 100.0 * inodes_avail / inodes_total
 412         inodes_warn, inodes_crit = levels["inodes_levels"]
 413         if inodes_warn is not None:
 414             # Levels in absolute numbers
 415             if isinstance(inodes_warn, int):
 416                 if inodes_crit > inodes_avail:
 417                     inode_status = 2
 418                     problems.append("less than %dk inodes available(!!)" % (crit_inode / 1000))
 419                 elif inodes_warn > inodes_avail:
 420                     inode_status = 1
 421                     problems.append("less than %dk inodes available(!)" % (warn_inode / 1000))
 422                 inodes_warn_abs = inodes_warn
 423                 inodes_crit_abs = inodes_crit
 424
 425             # Levels in percent
 426             else:
 427                 if inodes_crit > inodes_avail_perc:
 428                     inode_status = 2
 429                     problems.append("less than %s inodes available(!!)" %
 430                                     get_percent_human_readable(inodes_crit))
 431                 elif inodes_warn > inodes_avail_perc:
 432                     inode_status = 1
 433                     problems.append("less than %s inodes available(!)" %
 434                                     get_percent_human_readable(inodes_warn))
 435                 inodes_warn_abs = (100 - inodes_warn) / 100.0 * inodes_total
 436                 inodes_crit_abs = (100 - inodes_crit) / 100.0 * inodes_total
 437
 438         else:
 439             inodes_warn_abs = None
 440             inodes_crit_abs = None
 441
 442         # Only show inodes if they are at less then 50%
 443         status = max(status, inode_status)
 444         show_inodes = levels["show_inodes"]
 445         if show_inodes == "always" or \
 446             (show_inodes == "onlow" and (inode_status or inodes_avail_perc < 50)) or \
 447             (show_inodes == "onproblem" and inode_status):
 448             infotext += ", inodes available: %dk/%s" % (
 449                 inodes_avail / 1000, get_percent_human_readable(inodes_avail_perc))
 450
 451         perfdata += [("inodes_used", inodes_total - inodes_avail, inodes_warn_abs, inodes_crit_abs,
 452                       0, inodes_total)]
 453
 454     if problems:
 455         infotext += " - %s" % ", ".join(problems)
 456
 457     return status, infotext, perfdata