2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Author: Lars Michelsen <lm@mathias-kettner.de>
29 # Example outputs from agent:
33 # version: 8.3.8 (api:88/proto:86-94)
34 # GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by cssint@erzc20, 2010-06-17 14:47:26
35 # 0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----
36 # ns:12031428 nr:0 dw:12031364 dr:1175992347 al:2179 bm:71877 lo:37 pe:0 ua:37 ap:0 ep:1 wo:b oos:301729988
37 # [=======>............] sync'ed: 42.4% (294656/510908)M delay_probe: 145637
38 # finish: 1:23:28 speed: 60,172 (51,448) K/sec
42 # b01srv05:~ # cat /proc/drbd
43 # version: 8.3.8 (api:88/proto:86-94)
44 # GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by cssint@erzc20, 2010-06-17 14:47:26
45 # 0: cs:SyncSource ro:Primary/Secondary ds:UpToDate/Inconsistent C r----
46 # ns:11545876 nr:0 dw:11545900 dr:954551211 al:1955 bm:58360 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:523171100
47 # [>....................] sync'ed: 0.1% (510908/510908)M delay_probe: 135599
52 # version: 8.3.8 (api:88/proto:86-94)
53 # GIT-hash: d78846e52224fd00562f7c225bcc25b2d422321d build by cssint@erzc20, 2010-06-17 14:47:26
54 # 0: cs:Connected ro:Primary/Secondary ds:UpToDate/UpToDate C r----
55 # ns:12227928 nr:0 dw:12227864 dr:1477722351 al:2300 bm:90294 lo:0 pe:0 ua:0 ap:0 ep:1 wo:b oos:0
57 # Description of the /proc/drbd output:
58 # http://www.drbd.org/users-guide/ch-admin.html#s-proc-drbd
60 # The information from /proc/drbd are grouped as followed (Extracted from doc above)
63 # cs (connection state). Status of the network connection. See the section called
64 # “Connection states” for details about the various connection states.
66 # StandAlone. No network configuration available. The resource has not yet been connected,
67 # or has been administratively disconnected (using drbdadm disconnect),
68 # or has dropped its connection due to failed authentication or split brain.
69 # Disconnecting. Temporary state during disconnection. The next state is StandAlone.
70 # Unconnected. Temporary state, prior to a connection attempt.
71 # Possible next states: WFConnection and WFReportParams.
72 # Timeout. Temporary state following a timeout in the communication with the peer. Next state: Unconnected.
73 # BrokenPipe. Temporary state after the connection to the peer was lost. Next state: Unconnected.
74 # NetworkFailure. Temporary state after the connection to the partner was lost. Next state: Unconnected.
75 # ProtocolError. Temporary state after the connection to the partner was lost. Next state: Unconnected.
76 # TearDown. Temporary state. The peer is closing the connection. Next state: Unconnected.
77 # WFConnection. This node is waiting until the peer node becomes visible on the network.
78 # WFReportParams. TCP connection has been established, this node waits for the first network packet from the peer.
79 # Connected. A DRBD connection has been established, data mirroring is now active. This is the normal state.
80 # StartingSyncS. Full synchronization, initiated by the administrator, is just starting.
81 # The next possible states are: SyncSource or PausedSyncS.
82 # StartingSyncT. Full synchronization, initiated by the administrator, is just starting. Next state: WFSyncUUID.
83 # WFBitMapS. Partial synchronization is just starting. Next possible states: SyncSource or PausedSyncS.
84 # WFBitMapT. Partial synchronization is just starting. Next possible state: WFSyncUUID.
85 # WFSyncUUID. Synchronization is about to begin. Next possible states: SyncTarget or PausedSyncT.
86 # SyncSource. Synchronization is currently running, with the local node being the source of synchronization.
87 # SyncTarget. Synchronization is currently running, with the local node being the target of synchronization.
88 # PausedSyncS. The local node is the source of an ongoing synchronization, but synchronization is currently paused.
89 # This may be due to a dependency on the completion of another synchronization process,
90 # or due to synchronization having been manually interrupted by drbdadm pause-sync.
91 # PausedSyncT. The local node is the target of an ongoing synchronization, but synchronization
92 # is currently paused. This may be due to a dependency on the completion of another
93 # synchronization process, or due to synchronization having been manually interrupted by drbdadm pause-sync.
94 # VerifyS. On-line device verification is currently running, with the local node being the source of verification.
95 # VerifyT. On-line device verification is currently running, with the local node being the target of verification.
97 # ro (roles). Roles of the nodes. The role of the local node is displayed first, followed by the role of the partner
98 # node shown after the slash. See the section called “Resource roles” for details about the possible resource roles.
100 # Primary. The resource is currently in the primary role, and may be read from and written to.
101 # This role only occurs on one of the two nodes, unless dual-primary node is enabled.
102 # Secondary. The resource is currently in the secondary role. It normally receives updates
103 # from its peer (unless running in disconnected mode), but may neither be read from
104 # nor written to. This role may occur on one node or both nodes.
105 # Unknown. The resource's role is currently unknown. The local resource role never has this status.
106 # It is only displayed for the peer's resource role, and only in disconnected mode.
108 # ds (disk states). State of the hard disks. Prior to the slash the state of the local node is displayed,
109 # after the slash the state of the hard disk of the partner node is shown.
110 # See the section called “Disk states” for details about the various disk states.
112 # Diskless. No local block device has been assigned to the DRBD driver. This may mean that the resource
113 # has never attached to its backing device, that it has been manually detached using drbdadm detach
114 # or that it automatically detached after a lower-level I/O error.
115 # Attaching. Transient state while reading meta data.
116 # Failed. Transient state following an I/O failure report by the local block device. Next state: Diskless.
117 # Negotiating. Transient state when an Attach is carried out on an already-connected DRBD device.
118 # Inconsistent. The data is inconsistent. This status occurs immediately upon creation of a new resource,
119 # on both nodes (before the initial full sync). Also, this status is found in one node
120 # (the synchronization target) during synchronization.
121 # Outdated. Resource data is consistent, but outdated.
122 # DUnknown. This state is used for the peer disk if no network connection is available.
123 # Consistent. Consistent data of a node without connection. When the connection
124 # is established, it is decided whether the data are UpToDate or Outdated.
125 # UpToDate. Consistent, up-to-date state of the data. This is the normal state.
128 # ns (network send). Volume of net data sent to the partner via the network connection; in Kibyte.
129 # nr (network receive). Volume of net data received by the partner via the network connection; in Kibyte.
131 # dw (disk write). Net data written on local hard disk; in Kibyte.
132 # dr (disk read). Net data read from local hard disk; in Kibyte.
134 # al (activity log). Number of updates of the activity log area of the meta data.
135 # bm (bit map). Number of updates of the bitmap area of the meta data.
136 # lo (local count). Number of open requests to the local I/O sub-system issued by DRBD.
137 # pe (pending). Number of requests sent to the partner, but that have not yet been answered by the latter.
138 # ua (unacknowledged). Number of requests received by the partner via the network connection, but that have not yet been answered.
139 # ap (application pending). Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.
140 # ep (epochs). Number of epoch objects. Usually 1. Might increase under I/O load
141 # when using either the barrier or the none write ordering method. Since 8.2.7.
142 # wo (write order). Currently used write ordering method: b (barrier), f (flush), d (drain) or n (none). Since 8.2.7.
143 # oos (out of sync). Amount of storage currently out of sync; in Kibibytes. Since 8.2.6.
145 # Default thresholds for drbd checks
146 drbd_net_default_levels
= (None, None)
147 drbd_disk_default_levels
= (None, None)
148 drbd_stats_default_levels
= (None, None, None, None, None, None, None, None, None)
150 _drbd_block_start_match
= re
.compile('^[0-9]+:')
152 drbd_general_map
= ['cs', 'ro', 'ds']
153 drbd_net_map
= ['cs', 'ns', 'nr']
154 drbd_disk_map
= ['cs', 'dw', 'dr']
155 drbd_stats_map
= ['cs', 'al', 'bm', 'lo', 'pe', 'ua', 'ap', 'ep', 'wo', 'oos']
185 "primary_Diskless": 2,
186 "secondary_Diskless": 2,
187 "primary_Attaching": 2,
188 "secondary_Attaching": 2,
190 "secondary_Failed": 2,
191 "primary_Negotiating": 2,
192 "secondary_Negotiating": 2,
193 "primary_Inconsistent": 1,
194 "secondary_Inconsistent": 1,
195 "primary_Outdated": 2,
196 "secondary_Outdated": 2,
197 "primary_DUnknown": 2,
198 "secondary_DUnknown": 2,
199 "primary_Consistent": 2,
200 "secondary_Consistent": 2,
201 "primary_UpToDate": 0,
202 "secondary_UpToDate": 0,
206 def inventory_drbd(info
, checktype
):
208 for line
in info
[2:]:
209 if _drbd_block_start_match
.search(line
[0]) > 0:
210 parsed
= drbd_parse_block(drbd_extract_block('drbd%s' % line
[0][:-1], info
), checktype
)
211 # Skip unconfigured drbd devices
212 if parsed
['cs'] == 'Unconfigured':
215 if checktype
== 'drbd':
216 if 'ro' not in parsed
or 'ds' not in parsed
:
219 "roles_inventory": parsed
['ro'],
220 "diskstates_inventory": parsed
['ds'],
222 elif checktype
== 'drbd.net':
223 levels
= "drbd_net_default_levels"
224 elif checktype
== 'drbd.disk':
225 levels
= "drbd_disk_default_levels"
226 elif checktype
== 'drbd.stats':
227 levels
= "drbd_stats_default_levels"
228 inventory
.append(('drbd%s' % line
[0][:-1], levels
))
232 def drbd_parse_block(block
, to_parse
):
236 parts
= field
.split(':')
238 # Only parse the requested information depending on the check
240 if to_parse
== 'drbd' and parts
[0] in drbd_general_map
:
241 if parts
[0] in ['ro', 'ds']:
242 parsed
[parts
[0]] = parts
[1].split('/')
244 parsed
[parts
[0]] = parts
[1]
245 elif to_parse
== 'drbd.net' and parts
[0] in drbd_net_map
:
246 parsed
[parts
[0]] = parts
[1]
247 elif to_parse
== 'drbd.disk' and parts
[0] in drbd_disk_map
:
248 parsed
[parts
[0]] = parts
[1]
249 elif to_parse
== 'drbd.stats' and parts
[0] in drbd_stats_map
:
250 parsed
[parts
[0]] = parts
[1]
255 def drbd_extract_block(item
, info
):
258 # Ignore the first two lines since they contain drbd version information
259 for line
in info
[2:]:
260 if "drbd" + line
[0][:-1] == item
:
262 elif inBlock
and _drbd_block_start_match
.search(line
[0]) > 0 \
263 and "drbd" + line
[0][:-1] != item
:
264 # Another block starts. So the requested block is finished
267 # Skip unwanted lines
271 # If this is reached we are in the wanted block
277 def drbd_get_block(item
, info
, checktype
):
278 block
= drbd_extract_block(item
, info
)
280 return drbd_parse_block(block
, checktype
)
284 def check_drbd_general(item
, params
, info
):
285 parsed
= drbd_get_block(item
, info
, 'drbd')
287 if isinstance(params
, tuple):
289 params_conv
.update({"roles_inventory": params
[0] and params
[0] or None})
291 {"diskstates_inventory": (params
[0] and params
[1]) and params
[1] or None})
294 if not parsed
is None:
295 if parsed
['cs'] == 'Unconfigured':
296 return (2, 'The device is "Unconfigured"')
297 elif not parsed
['cs'] in drbd_cs_map
:
298 return (3, 'Undefined "connection state" in drbd output')
300 # Weight of connection state is calculated by the drbd_cs_map.
301 # The roles and disk states are calculated using the expected values
302 state
= drbd_cs_map
[parsed
['cs']]
303 output
= 'Connection State: %s' % parsed
['cs']
306 output
+= ', Roles: %s/%s' % tuple(parsed
['ro'])
307 current_roles
= "_".join(str(a
).lower() for a
in parsed
["ro"])
309 found_role_match
= False
310 if "roles" in params
:
311 roles
= params
.get("roles")
313 for roles_entry
, roles_state
in roles
:
314 if roles_entry
== current_roles
:
315 found_role_match
= True
316 state
= max(state
, roles_state
)
317 output
+= ' %s' % state_markers
[roles_state
]
319 else: # Ignore roles if set to None
320 found_role_match
= True
322 if not found_role_match
:
323 if "roles_inventory" in params
:
324 roles_inventory
= params
.get("roles_inventory")
325 if roles_inventory
and parsed
["ro"] != roles_inventory
:
326 state
= max(2, state
)
327 output
+= ' (Expected: %s/%s)' % tuple(params
.get("roles_inventory"))
329 state
= max(3, state
)
330 output
+= ' (Check requires a new service discovery)'
332 output
+= ', Diskstates: %s/%s' % tuple(parsed
['ds'])
333 # Do not evaluate diskstates. Either set by rule or through the
334 # legacy configuration option None in the check parameters tuple
335 if "diskstates" in params
and params
["diskstates"] is None or \
336 "diskstates_inventory" in params
and params
["diskstates_inventory"] is None:
337 return (state
, output
)
339 params_diskstates_dict
= dict(params
.get("diskstates", []))
340 diskstates_info
= set()
341 for ro
, ds
in [(parsed
["ro"][0], parsed
["ds"][0]), (parsed
["ro"][1], parsed
["ds"][1])]:
342 diskstate
= "%s_%s" % (ro
.lower(), ds
)
343 params_diskstate
= params_diskstates_dict
.get(diskstate
)
345 if params_diskstate
is not None:
346 state
= max(state
, params_diskstate
)
347 diskstates_info
.add('%s/%s is %s' % (ro
, ds
, state_markers
[params_diskstate
]))
349 default_state
= drbd_ds_map
.get(diskstate
, 3)
350 if default_state
> 0:
351 diskstates_info
.add('%s/%s is %s' % (ro
, ds
, state_markers
[default_state
]))
352 state
= max(state
, drbd_ds_map
.get(diskstate
, 3))
354 output
+= " (%s)" % ", ".join(diskstates_info
)
356 return (state
, output
)
358 return (3, "Undefined state")
361 check_info
["drbd"] = {
362 'inventory_function': lambda info
: inventory_drbd(info
, "drbd"),
363 'check_function': check_drbd_general
,
365 'has_perfdata': True,
366 'service_description': 'DRBD %s status',
370 def drbd_get_rates(list_
):
374 for type_
, name
, item
, value
, uom
in list_
:
375 rate
= get_rate("%s.%s.%s" % (type_
, name
, item
), now
, value
)
376 perfdata
.append((name
, rate
))
377 output
+= ' %s/sec: %s%s' % (name
, rate
, uom
)
378 return (output
, perfdata
)
381 def check_drbd_net(item
, params
, info
):
382 parsed
= drbd_get_block(item
, info
, 'drbd.net')
383 if not parsed
is None:
384 if parsed
['cs'] == 'Unconfigured':
385 return (2, 'The device is "Unconfigured"')
386 output
, perfdata
= drbd_get_rates([('drbd.net', 'in', item
, int(parsed
['nr']), 'kb'),
387 ('drbd.net', 'out', item
, int(parsed
['ns']), 'kb')])
388 # FIXME: Maybe handle thresholds in the future
389 return (0, output
, perfdata
)
391 return (3, "Undefined state")
394 check_info
["drbd.net"] = {
395 'inventory_function': lambda info
: inventory_drbd(info
, "drbd.net"),
396 'check_function': check_drbd_net
,
398 'has_perfdata': True,
399 'service_description': 'DRBD %s net',
403 def check_drbd_disk(item
, params
, info
):
404 parsed
= drbd_get_block(item
, info
, 'drbd.disk')
405 if not parsed
is None:
406 if parsed
['cs'] == 'Unconfigured':
407 return (2, 'The device is "Unconfigured"')
408 output
, perfdata
= drbd_get_rates([('drbd.disk', 'write', item
, int(parsed
['dw']), 'kb'),
409 ('drbd.disk', 'read', item
, int(parsed
['dr']), 'kb')])
410 # FIXME: Maybe handle thresholds in the future
411 return (0, output
, perfdata
)
413 return (3, "Undefined state")
416 check_info
["drbd.disk"] = {
417 'inventory_function': lambda info
: inventory_drbd(info
, "drbd.disk"),
418 'check_function': check_drbd_disk
,
419 'group': 'drbd.disk',
420 'has_perfdata': True,
421 'service_description': 'DRBD %s disk',
425 def check_drbd_stats(item
, params
, info
):
426 parsed
= drbd_get_block(item
, info
, 'drbd.stats')
427 if not parsed
is None:
428 if parsed
['cs'] == 'Unconfigured':
429 return (2, 'The device is "Unconfigured"')
433 ('al', 'activity log updates'),
434 ('bm', 'bit map updates'),
435 ('lo', 'local count requests'),
436 ('pe', 'pending requests'),
437 ('ua', 'unacknowledged requests'),
438 ('ap', 'application pending requests'),
439 ('ep', 'epoch objects'),
440 ('wo', 'write order'),
441 ('oos', 'kb out of sync'),
444 output
+= '%s: %s, ' % (label
, parsed
[key
])
446 parsed
[key
] = '0' # perfdata must always have same number of entries
447 if parsed
[key
].isdigit():
448 perfdata
.append(('%s' % label
.replace(" ", "_"), int(parsed
[key
])))
449 return (0, output
.rstrip(', '), perfdata
)
451 return (3, "Undefined state")
454 check_info
["drbd.stats"] = {
455 'inventory_function': lambda info
: inventory_drbd(info
, "drbd.stats"),
456 'check_function': check_drbd_stats
,
457 'group': 'drbd.stats',
458 'has_perfdata': True,
459 'service_description': 'DRBD %s stats',