2 # -*- encoding: utf-8; py-indent-offset: 4 -*-
3 # +------------------------------------------------------------------+
4 # | ____ _ _ __ __ _ __ |
5 # | / ___| |__ ___ ___| | __ | \/ | |/ / |
6 # | | | | '_ \ / _ \/ __| |/ / | |\/| | ' / |
7 # | | |___| | | | __/ (__| < | | | | . \ |
8 # | \____|_| |_|\___|\___|_|\_\___|_| |_|_|\_\ |
10 # | Copyright Mathias Kettner 2014 mk@mathias-kettner.de |
11 # +------------------------------------------------------------------+
13 # This file is part of Check_MK.
14 # The official homepage is at http://mathias-kettner.de/check_mk.
16 # check_mk is free software; you can redistribute it and/or modify it
17 # under the terms of the GNU General Public License as published by
18 # the Free Software Foundation in version 2. check_mk is distributed
19 # in the hope that it will be useful, but WITHOUT ANY WARRANTY; with-
20 # out even the implied warranty of MERCHANTABILITY or FITNESS FOR A
21 # PARTICULAR PURPOSE. See the GNU General Public License for more de-
22 # tails. You should have received a copy of the GNU General Public
23 # License along with GNU Make; see the file COPYING. If not, write
24 # to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
25 # Boston, MA 02110-1301 USA.
27 # First generation of agents output only the process command line:
28 # /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6
30 # Second generation of agents output the user in brackets in the first columns:
31 # (root) /usr/sbin/xinetd -pidfile /var/run/xinetd.pid -stayalive -inetd_compat -inetd_ipv6
33 # Third generation (from 1.1.5) output also virtual memory, resident memory and %CPU:
34 # (class,122376,88128,0.0) /usr/jre1.6.0_13/bin/java -Dn=Cart_16TH13 -Dmcs.node=zbgh1ca -Dmcs.mdt.redundan
36 # Forth generation (>=1.2.5), additional columns in bracket:
37 # (user, virtual_size, resident_size, %cpu, processID, pagefile_usage, usermodetime, kernelmodetime, openHandles, threadCount) name
38 # (\\KLAPPRECHNER\ab,29284,2948,0,3124,904,400576,901296,35,1) NOTEPAD.EXE
40 # Sixth generation (>=1.2.7) adds an optional etime, joined by "/" with the CPU time
42 # The plugin "psperf.bat" is deprecated. As of version 1.2.5 all of this information
43 # is reported by the windows agent itself. However, we still support sections from psperf.bat
44 # if the agent version is lower than 1.2.5.
45 # Windows agent now ships a plugin "psperf.bat" that adds a section from wmic
50 # Node,KernelModeTime,Name,PageFileUsage,ThreadCount,UserModeTime,VirtualSize,WorkingSetSize^M
51 # WINDOWSXP,43478281250,System Idle Process,0,2,0,0,28672^M
52 # WINDOWSXP,155781250,System,0,59,0,1957888,253952^M
53 # WINDOWSXP,468750,smss.exe,176128,3,156250,3928064,442368^M
54 # WINDOWSXP,56406250,csrss.exe,1863680,12,11406250,25780224,3956736^M
55 # WINDOWSXP,18593750,winlogon.exe,6832128,19,4843750,59314176,2686976^M
56 # WINDOWSXP,167500000,services.exe,1765376,16,13750000,22601728,4444160^M
57 # WINDOWSXP,16875000,lsass.exe,3964928,21,3906250,43462656,6647808^M
58 # WINDOWSXP,8750000,VBoxService.exe,1056768,8,468750,26652672,3342336^M
60 # New since 1.2.1i2: WATO compatible syntax
62 # Holds a list of rules which are matching hosts by names or tags and
63 # where each rule holds a dictionary.
65 # Each of those entries defines the following options:
67 # 1. descr: item name to be used for the service description
68 # 2. match: matching-definition
69 # 3. user: user definition
70 # 5. perfdata: monitor with perfdata
71 # 4. levels: four numbers (thresholds)
72 inventory_processes_rules
= []
74 # Deprecated option since 1.6. cmk_base creates a config warning when finding rules
75 # for this ruleset. Can be dropped with 1.7.
76 inventory_processes
= []
78 inventory_processes_perf
= []
82 def ps_cleanup_counters(parsed
):
83 # remove legacy key used for some kind of caching
84 cleanup_idents
= ["last.cleared.ps_"]
86 pids
= ps_get_current_pids(parsed
)
87 cleanup_idents
+= ps_get_counters_to_delete(pids
)
89 # Avoid growing up the item state with info about processes that aren't
90 # executing anymore. Clean all information about process that are not
91 # the ones specifically inside the current parsed agent output
92 clear_item_states_by_full_keys(cleanup_idents
)
95 # Get the idents of the counters which can be deleted because the process id of
96 # the counter is not found anymore in the process table.
98 # Handle these formats of idents:
99 # Old string based keys: 'ps_stat.pcpu.669': (1448634267.875281, 1),
100 # New magic keys: ('ps', None, 'ps_wmic.kernel.692'): (1448633487.573496, 1092007),
101 def ps_get_counters_to_delete(pids
):
102 counters_to_delete
= []
103 for ident
in get_all_item_states():
104 if isinstance(ident
, tuple) and ident
[0] in ["ps", "ps.perf"]:
105 check_ident
= ident
[2]
106 elif not isinstance(ident
, tuple) and (ident
.startswith("ps_stat") or
107 ident
.startswith("ps_wmic")):
112 pid
= check_ident
.split(".")[-1]
113 if pid
.isdigit() and pid
not in pids
:
114 counters_to_delete
.append(ident
)
116 return counters_to_delete
119 def ps_get_current_pids(parsed
):
122 process_info
= line
[1]
123 if process_info
.process_id
:
124 pids
.add(process_info
.process_id
)
128 # This function is only concerned with deprecated output from psperf.bat,
129 # in case of all other output it just returns info unmodified. But if it is
130 # a windows output it will extract the number of cpu cores
131 def ps_merge_wmic_info(info
):
132 # Agent output version cmk>1.2.5
133 # Assumes line = [CLUSTER, PS_INFO, COMMAND]
136 if len(line
) > 2 and line
[2].lower() == "system idle process":
137 cpu_cores
= int(line
[1][1:-1].split(",")[9])
138 return cpu_cores
, info
139 if "wmic process" in line
[-1]:
143 # Data from other systems than windows
147 # Data from windows with wmic info, cmk<1.2.5
148 return extract_wmic_info(info
)
151 def extract_wmic_info(info
):
160 if line
[-1] == '[wmic process]':
162 wmic_headers
= ["node"] + lines
.next()[1:]
164 elif line
[-1] == '[wmic process end]':
167 except StopIteration:
168 break # Finished with all lines
171 row
= dict(zip(wmic_headers
, line
))
172 # Row might be damaged. I've seen this agent output:
173 # Node - TILE-BUILDER02
175 # Description = Quota violation
178 if "Name" in row
and "ProcessId" in row
:
179 wmic_info
.setdefault((row
["node"], row
["Name"]), []).append(row
)
181 ps_result
.append(line
) # plain list of process names
183 return merge_wmic(ps_result
, wmic_info
, wmic_headers
)
186 def merge_wmic(ps_result
, wmic_info
, wmic_headers
):
188 seen_pids
= set([]) # Remove duplicate entries
190 for line
in ps_result
:
191 psinfos
= wmic_info
.get((line
[0], line
[1]), [])
193 psinfo
= psinfos
.pop() # each info is used only once!
194 # Get number of CPU cores from system idle process
195 if "ThreadCount" in wmic_headers
and psinfo
["Name"].lower() == "system idle process":
196 cpu_cores
= int(psinfo
["ThreadCount"])
197 pid
= int(psinfo
["ProcessId"])
198 if pid
not in seen_pids
:
200 virt
= int(psinfo
["VirtualSize"]) / 1024 # Bytes -> KB
201 resi
= int(psinfo
["WorkingSetSize"]) / 1024 # Bytes -> KB
202 pagefile
= int(psinfo
["PageFileUsage"]) / 1024 # Bytes -> KB
203 userc
= int(psinfo
["UserModeTime"]) # do not resolve counter here!
204 kernelc
= int(psinfo
["KernelModeTime"]) # do not resolve counter here!
205 handlec
= int(psinfo
.get("HandleCount", 0)) # Only in newer psperf.bat versions
206 threadc
= int(psinfo
["ThreadCount"]) # do not resolve counter here!
208 "(unknown,%d,%d,0,%d,%d,%d,%d,%d,%d,)" % (virt
, resi
, pid
, pagefile
, userc
,
209 kernelc
, handlec
, threadc
)
213 return cpu_cores
, info
216 # This mainly formats the line[1] element which contains the process info (user,...)
217 def ps_parse_process_entries(parsed
):
219 # line[1] = process_info OR (if no process info available) = process name
221 process_info
= ps_info_tuple(line
[1])
223 line
[1] = process_info
225 # Make number of columns in line consistent for discovery/check
226 line
.insert(1, ps_info())
228 # Filter out any lines where no process command line is available, e.g.
229 # [None, u'(<defunct>,,,)']
230 # [None, u'(<defunct>,,,)', u'']
231 parsed
= [x
for x
in parsed
if len(x
) > 2 and x
[2]]
235 # Produces a list of lists where each sub list is built as follows:
237 # [None, (u'root', u'35156', u'4372', u'00:00:05/2-14:14:49', u'1'), u'/sbin/init'],
239 # First element: The node the data comes from in a cluster or None
240 # Second element: The process info tuple (see ps.include: check_ps_common() for details on the elements)
241 # Third element: The process command line
243 cpu_cores
, parsed
= ps_merge_wmic_info(info
)
245 parsed
= ps_parse_process_entries(parsed
)
247 # Cleanup counters of processes which do not exist anymore
248 ps_cleanup_counters(parsed
)
250 return cpu_cores
, parsed
253 def inventory_ps(info
):
254 _cpu_cores
, parsed
= info
[0]
255 return inventory_ps_common(inventory_processes_rules
, parsed
)
258 def check_ps(item
, params
, info
):
260 parsed
), mem_info
, solaris_mem_info
, statgrab_mem_info
, aix_memory_info
, cpu_info
= info
262 # cpu_info for non windows systems
263 if cpu_info
and len(cpu_info
[0]) == 6:
264 cpu_cores
= int(cpu_info
[0][5])
267 total_ram
= parse_proc_meminfo_bytes(mem_info
).get("MemTotal")
268 elif solaris_mem_info
:
269 total_ram
= solaris_mem_info
.get("MemTotal") * 1024
270 elif statgrab_mem_info
:
271 total_ram
= statgrab_mem_info
.get("MemTotal") * 1024
272 elif aix_memory_info
:
273 total_ram
= int(aix_memory_info
[0][0]) * 4 * 1024
277 return check_ps_common(item
, params
, parsed
, cpu_cores
=cpu_cores
, total_ram
=total_ram
)
281 "parse_function": parse_ps
,
282 "inventory_function": inventory_ps
,
283 "check_function": check_ps
,
284 "service_description": "Process %s",
285 "includes": ["ps.include", "mem.include"],
286 "has_perfdata": True,
287 "node_info": True, # add first column with actual host name
289 "default_levels_variable": "ps_default_levels",
290 "extra_sections": ["mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],
293 # NOTE: This check is deprecated and will be removed any decade now. ps now
294 # does always performance data.
295 check_info
['ps.perf'] = {
296 "check_function": check_ps
,
297 "service_description": "Process %s",
298 "includes": ["ps.include", "mem.include"],
299 "has_perfdata": True,
300 "node_info": True, # add first column with actual host name
302 "default_levels_variable": "ps_default_levels",
303 "extra_sections": ["mem", "solaris_mem", "statgrab_mem", "aix_memory", "cpu"],