Cleanup config.nodes_of
[check_mk.git] / checks / multipath
blob15cf3feb218b68194a9ed2715cfc65e1ca48df0d
1 #!/usr/bin/python
2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
9 # | |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # Configuration for using alias instead of UUID
28 inventory_multipath_rules = []
30 # Output from multipath -l has the following possible formats:
32 # orabase.lun50 (360a9800043346937686f456f59386741) dm-15 NETAPP,LUN
33 # [size=25G][features=1 queue_if_no_path][hwhandler=0]
34 # \_ round-robin 0 [prio=0][active]
35 # \_ 1:0:3:50 sdy 65:128 [active][undef]
36 # \_ 3:0:3:50 sdz 65:144 [active][undef]
38 # An alias might not be defined. The out is then:
39 # 360a980004334644d654a364469555a76
40 # [size=300 GB][features="0"][hwhandler="0"]
41 # \_ round-robin 0 [active]
42 # \_ 1:0:2:13 sdc 8:32 [active][ready]
43 # \_ 3:0:2:13 sdl 8:176 [active][ready]
45 # Might also be local disks:
46 # SFUJITSU_MAW3073NC_DBL2P62003VT
47 # [size=68 GB][features="0"][hwhandler="0"]
48 # \_ round-robin 0 [active]
49 # \_ 0:0:3:0 sdb 8:16 [active][ready]
51 # Some very special header line
52 # (No space between uuid and dm-* - strange...)
53 # 360a980004334644d654a316e65306e51dm-4 NETAPP,LUN
54 # [size=30G][features=1 queue_if_no_path][hwhandler=0]
55 # \_ round-robin 0 [prio=0][active]
56 # \_ 1:0:2:50 sdg 8:96 [active][undef]
57 # \_ round-robin 0 [prio=0][enabled]
58 # \_ 4:0:1:50 sdl 8:176 [active][undef]
60 # And another one:
61 # 1494554000000000052303250303700000000000000000000 dm-0 IET,VIRTUAL-DISK
62 # [size=70G][features=0][hwhandler=0][rw]
63 # \_ round-robin 0 [prio=-1][active]
64 # \_ 3:0:0:0 sdb 8:16 [active][undef]
65 # \_ round-robin 0 [prio=-1][enabled]
66 # \_ 4:0:0:0 sdc 8:32 [active][undef]
68 # Other output from other host:
69 # anzvol2 (36005076306ffc648000000000000510a) dm-15 IBM,2107900
70 # [size=100G][features=0][hwhandler=0]
71 # \_ round-robin 0 [prio=-6][active]
72 # \_ 4:0:5:1 sdbf 67:144 [active][undef]
73 # \_ 4:0:4:1 sdau 66:224 [active][undef]
74 # \_ 4:0:3:1 sdaj 66:48 [active][undef]
75 # \_ 3:0:5:1 sdy 65:128 [active][undef]
76 # \_ 3:0:4:1 sdn 8:208 [active][undef]
77 # \_ 3:0:3:1 sdc 8:32 [active][undef]
78 # anzvol1 (36005076306ffc6480000000000005005) dm-16 IBM,2107900
79 # [size=165G][features=0][hwhandler=0]
80 # \_ round-robin 0 [prio=-6][active]
81 # \_ 4:0:5:0 sdbe 67:128 [active][undef]
82 # \_ 4:0:4:0 sdat 66:208 [active][undef]
83 # \_ 4:0:3:0 sdai 66:32 [active][undef]
84 # \_ 3:0:5:0 sdx 65:112 [active][undef]
85 # \_ 3:0:4:0 sdm 8:192 [active][undef]
86 # \_ 3:0:3:0 sdb 8:16 [active][undef]
88 # And one other output (ID has not 33 times A-Z0-9):
89 # mpath1 (SIBM_____SwapA__________DA02BF71)
90 # [size=41 GB][features="0"][hwhandler="0"]
91 # \_ round-robin 0 [active]
92 # \_ 1:0:1:0 sdd 8:48 [active]
94 # Recently I've seen this output >:-P
95 # 360080e500017bd72000002eb4c1b1ae8 dm-1 IBM,1814 FAStT
96 # size=350G features='1 queue_if_no_path' hwhandler='1 rdac' wp=rw
97 # `-+- policy='round-robin 0' prio=-1 status=active
98 # |- 7:0:2:81 sdd 8:48 active undef running
99 # `- 8:0:2:81 sdp 8:240 active undef running
101 # And this has been seen on SLES 11 SP1 64 Bit:
102 # 3600508b40006d7da0001a00004740000 dm-0 HP,HSV210
103 # size=10G features='1 queue_if_no_path' hwhandler='0' wp=rw
104 # |-+- policy='round-robin 0' prio=-1 status=active
105 # | |- 2:0:0:1 sda 8:0 active undef running
106 # | `- 3:0:0:1 sdo 8:224 active undef running
107 # `-+- policy='round-robin 0' prio=-1 status=enabled
108 # |- 3:0:1:1 sdv 65:80 active undef running
109 # `- 2:0:1:1 sdh 8:112 active undef running
111 # This is another output, which made problems up to
112 # 1.1.12:
114 # SDDN_S2A_9900_1308xxxxxxxx dm-13 DDN,S2A 9900
115 # [size=7.3T][features=0][hwhandler=0][rw]
116 # \_ round-robin 0 [prio=0][active]
117 # \_ 3:0:1:11 sdaj 66:48 [failed][undef]
118 # \_ 4:0:0:11 sdbh 67:176 [failed][undef]
119 # \_ 4:0:2:11 sddd 70:176 [active][undef]
120 # \_ 3:0:2:11 sdeb 128:48 [active][undef]
121 # \_ round-robin 0 [prio=0][enabled]
122 # \_ 4:0:1:11 sdcf 69:48 [active][undef]
123 # \_ 3:0:0:11 sdl 8:176 [active][undef]
125 # Just an underscore and a dash in the LUN name
126 # SDataCoreSANsymphony_DAT05-fscl dm-6 DataCore,SANsymphony
127 # [size=600G][features=0][hwhandler=0]
128 # \_ round-robin 0 [prio=-1][enabled]
129 # \_ 3:0:0:11 sdae 65:224 [active][undef]
130 # \_ round-robin 0 [prio=-1][enabled]
131 # \_ 4:0:0:11 sdt 65:48 [active][undef]
133 # This one here is from RedHat 6. Very creative...
134 # 1IET 00010001 dm-4 IET,VIRTUAL-DISK
135 # size=200G features='0' hwhandler='0' wp=rw
136 # |-+- policy='round-robin 0' prio=0 status=active
137 # | `- 23:0:0:1 sdk 8:160 active undef running
138 # |-+- policy='round-robin 0' prio=0 status=enabled
139 # | `- 21:0:0:1 sdj 8:144 active undef running
140 # |-+- policy='round-robin 0' prio=0 status=enabled
141 # | `- 22:0:0:1 sdg 8:96 active undef running
142 # `-+- policy='round-robin 0' prio=0 status=enabled
143 # `- 20:0:0:1 sdi 8:128 active undef running
145 # And a completely new situation:
146 # <<<multipath>>>
147 # Nov 05 17:17:03 | DM multipath kernel driver not loaded
148 # Nov 05 17:17:03 | /etc/multipath.conf does not exist, blacklisting all devices.
149 # Nov 05 17:17:03 | A sample multipath.conf file is located at
150 # Nov 05 17:17:03 | /usr/share/doc/device-mapper-multipath-0.4.9/multipath.conf
151 # Nov 05 17:17:03 | You can run /sbin/mpathconf to create or modify /etc/multipath.conf
152 # Nov 05 17:17:03 | DM multipath kernel driver not loaded
154 # UUID which includes dots (seen on Oracle Exadata VM)
155 # <<<multipath>>>
156 # iqn.2015-05.com.oracle:QD_DG_VOTE101_EXAO2ADM1VM101 dm-7 IET,VIRTUAL-DISK
157 # size=128M features='0' hwhandler='0' wp=rw
158 # |-+- policy='round-robin 0' prio=0 status=active
159 # | `- 8:0:0:1 sdg 8:96 active undef unknown
160 # |-+- policy='round-robin 0' prio=0 status=enabled
161 # | `- 9:0:0:1 sdh 8:112 active undef unknown
162 # |-+- policy='round-robin 0' prio=0 status=enabled
163 # | `- 10:0:0:1 sdi 8:128 active undef unknown
164 # `-+- policy='round-robin 0' prio=0 status=enabled
165 # `- 11:0:0:1 sdj 8:144 active undef unknown
168 def parse_multipath(info):
169 # New reported header lines need to be placed here
170 # the matches need to be put in a list of tupples
171 # while the structure of the tupple is:
172 # 0: matching regex
173 # 1: matched regex-group id of UUID
174 # 2: matched regex-group id of alias (optional)
175 # 3: matched regex-group id of dm-device (optional)
176 reg_headers = [
177 (regex(r"^[0-9a-z]{33}$"), 0, None, None), # 1. (should be included in 3.)
178 (regex(r"^([^\s]+)\s\(([0-9A-Za-z_-]+)\)\s(dm.[0-9]+)"), 2, 1, 3), # 2.
179 (regex(r"^([^\s]+)\s\(([0-9A-Za-z_-]+)\)"), 2, 1, None), # 2.
180 (regex(r"^[a-zA-Z0-9_]+$"), 0, None, None), # 3.
181 (regex(r"^([0-9a-z]{33}|[0-9a-z]{49})\s?(dm.[0-9]+).*$"), 1, None, 2), # 4.
182 (regex(r"^[a-zA-Z0-9_]+(dm-[0-9]+).*$"), 0, None, 1), # 5. Remove this line in 1.2.0
183 (regex(r"^([-.a-zA-Z0-9_ :]+)\s?(dm-[0-9]+).*$"), 1, None, 2), # 6. and 7.
186 reg_prio = regex("[[ ]prio=")
187 reg_lun = regex("[0-9]+:[0-9]+:[0-9]+:[0-9]+")
188 uuid = None
189 alias = None
190 groups = {}
191 group = {}
192 numpaths = None
193 for line in info:
194 # Ignore error messages due to invalid multipath.conf
195 if line[0] == "multipath.conf":
196 continue
198 # newer agent also output the device mapper table.
199 # ignore those lines for now.
200 if line[0] == "dm":
201 # Reset current device and skip line
202 uuid = None
203 continue
205 # restore original non-split line
206 l = " ".join(line)
208 # Skip output when multipath is not present
209 if l.endswith('kernel driver not loaded') \
210 or l.endswith('does not exist, blacklisting all devices.') \
211 or l.endswith('A sample multipath.conf file is located at') \
212 or l.endswith('multipath.conf'):
213 uuid = None
214 continue
216 # First simply separate between data row and header row
217 if line[0][0] not in ['[', '`', '|', '\\'] and not line[0].startswith("size="):
218 # Try to match header lines
219 matchobject = None
220 for header_regex, uuid_pos, alias_pos, dm_pos in reg_headers:
221 matchobject = header_regex.search(l)
222 if matchobject:
223 uuid = matchobject.group(uuid_pos).strip()
225 if alias_pos:
226 alias = matchobject.group(alias_pos)
227 else:
228 alias = None
230 if dm_pos:
231 dm_device = matchobject.group(dm_pos)
232 else:
233 dm_device = None
235 break
236 # No data row and no matching header row
237 if not matchobject:
238 raise ValueError("Invalid line in agent output: " + l)
240 # initialize information about next device
241 numpaths = 0
242 lun_info = []
243 paths_info = []
244 broken_paths = []
245 group = {}
246 group['paths'] = paths_info
247 group['broken_paths'] = broken_paths
248 group['luns'] = lun_info
249 group['uuid'] = uuid
250 group['state'] = None
251 group['numpaths'] = 0
252 group['device'] = dm_device
253 groups[uuid] = group
255 # If the device has an alias, then extract it
256 if alias:
257 group['alias'] = alias
259 # Proceed with next line after init
260 continue
261 elif uuid is not None:
262 # Handle special syntax | |- 2:0:0:1 sda ...
263 if line[0] == '|':
264 line = line[1:]
265 if reg_prio.search(l):
266 group['state'] = "".join(line[3:])
267 elif len(line) >= 4 and reg_lun.match(line[1]):
268 luninfo = "%s(%s)" % (line[1], line[2])
269 lun_info.append(luninfo)
270 state = line[4]
271 if not "active" in state:
272 broken_paths.append(luninfo)
273 numpaths += 1
274 group['numpaths'] = numpaths
275 paths_info.append(line[2])
276 return groups
279 # Get list of UUIDs of all multipath devices
280 # Length of UUID is 360a9800043346937686f456f59386741
281 def inventory_multipath(parsed):
282 settings = host_extra_conf_merged(host_name(), inventory_multipath_rules)
284 inventory = []
285 for uuid, info in parsed.items():
286 # take current number of paths as target value
287 if "alias" in info and settings.get("use_alias"):
288 item = info["alias"]
289 else:
290 item = uuid
291 inventory.append((item, info['numpaths']))
292 return inventory
295 # item is UUID (e.g. '360a9800043346937686f456f59386741') or alias (e.g. 'mpath0')
296 #def check_multipath(item, target_numpaths, parsed):
297 def check_multipath(item, params, parsed):
298 # Keys in parsed are the UUIDs. First assume that we are
299 # looking for a UUID. Then fall back to aliases
300 if item in parsed:
301 mmap = parsed[item]
302 elif item.strip() in parsed:
303 # support items discovered before 1.2.7
304 mmap = parsed[item.strip()]
305 else:
306 for mmap in parsed.values():
307 if mmap.get("alias") == item:
308 break
309 else:
310 yield 3, "Multipath device not found in agent output"
311 return
313 # If the item is the alias, then show the UUID in the plugin output.
314 # If the item is the UUID, then vice versa.
315 alias = mmap.get('alias')
316 uuid = mmap.get('uuid')
318 if item == uuid and alias:
319 aliasinfo = "(%s): " % alias
320 elif item == alias and uuid:
321 aliasinfo = "(%s): " % uuid
322 else:
323 aliasinfo = ""
325 all_paths = mmap['paths']
326 broken_paths = mmap['broken_paths']
327 num_paths = len(all_paths)
328 num_broken = len(broken_paths)
329 num_active = num_paths - num_broken
331 infotext = "%sPaths active: %s/%s" % \
332 (aliasinfo, num_active, num_paths)
334 if isinstance(params, tuple):
335 warn, crit = params
336 warn_num = (warn / 100.0) * num_paths
337 crit_num = (crit / 100.0) * num_paths
338 if num_active < crit_num:
339 state = 2
340 elif num_active < warn_num:
341 state = 1
342 else:
343 state = 0
345 if state > 0:
346 infotext += " (warn/crit below %d/%d)" % (warn_num, crit_num)
347 yield state, infotext
349 else:
350 if isinstance(params, int):
351 expected_paths = params
352 else:
353 expected_paths = 2
355 infotext += ", Expected paths: %s" % expected_paths
356 if num_active < expected_paths:
357 state = 2
358 infotext += " (crit below %d)" % expected_paths
359 elif num_active > expected_paths:
360 state = 1
361 infotext += " (warn at %d)" % expected_paths
362 else:
363 state = 0
364 yield state, infotext
366 if num_broken > 0:
367 yield 0, "Broken paths: %s" % ",".join(broken_paths)
370 check_info["multipath"] = {
371 'check_function': check_multipath,
372 'inventory_function': inventory_multipath,
373 'parse_function': parse_multipath,
374 'service_description': "Multipath %s",
375 'group': "multipath",