1 /*****************************************************************************
3 * Monitoring check_procs plugin
6 * Copyright (c) 2000-2008 Monitoring Plugins Development Team
10 * This file contains the check_procs plugin
12 * Checks all processes and generates WARNING or CRITICAL states if the
13 * specified metric is outside the required threshold ranges. The metric
14 * defaults to number of processes. Search filters can be applied to limit
15 * the processes to check.
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
32 *****************************************************************************/
34 const char *progname
= "check_procs";
35 const char *program_name
= "check_procs"; /* Required for coreutils libs */
36 const char *copyright
= "2000-2008";
37 const char *email
= "devel@monitoring-plugins.org";
41 #include "utils_cmd.h"
47 #ifdef HAVE_SYS_STAT_H
51 int process_arguments (int, char **);
52 int validate_arguments (void);
53 int convert_to_seconds (char *);
54 void print_help (void);
55 void print_usage (void);
57 char *warning_range
= NULL
;
58 char *critical_range
= NULL
;
59 thresholds
*procs_thresholds
= NULL
;
61 int options
= 0; /* bitmask of filter criteria to test against */
72 #define EREG_ARGS 1024
74 #define KTHREAD_PARENT "kthreadd" /* the parent process of kernel threads:
75 ppid of procs are compared to pid of this proc*/
77 /* Different metrics */
86 enum metric metric
= METRIC_PROCS
;
97 char *input_filename
= NULL
;
101 char tmp
[MAX_INPUT_BUFFER
];
102 int kthread_filter
= 0;
103 int usepid
= 0; /* whether to test for pid or /proc/pid/exe */
105 FILE *ps_input
= NULL
;
108 stat_exe (const pid_t pid
, struct stat
*buf
) {
111 xasprintf(&path
, "/proc/%d/exe", pid
);
112 ret
= stat(path
, buf
);
119 main (int argc
, char **argv
)
133 pid_t kthread_ppid
= 0;
139 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
142 const char *zombie
= "Z";
144 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
145 int found
= 0; /* counter for number of lines returned in `ps` output */
146 int procs
= 0; /* counter for number of processes meeting filter criteria */
147 int pos
; /* number of spaces before 'args' in `ps` output */
148 int cols
; /* number of columns in ps output */
149 int expected_cols
= PS_COLS
- 1;
150 int warn
= 0; /* number of processes in warn state */
151 int crit
= 0; /* number of processes in crit state */
153 int result
= STATE_UNKNOWN
;
155 output chld_out
, chld_err
;
157 setlocale (LC_ALL
, "");
158 bindtextdomain (PACKAGE
, LOCALEDIR
);
159 textdomain (PACKAGE
);
160 setlocale(LC_NUMERIC
, "POSIX");
162 input_buffer
= malloc (MAX_INPUT_BUFFER
);
163 procprog
= malloc (MAX_INPUT_BUFFER
);
165 xasprintf (&metric_name
, "PROCS");
166 metric
= METRIC_PROCS
;
168 /* Parse extra opts if any */
169 argv
=np_extra_opts (&argc
, argv
, progname
);
171 if (process_arguments (argc
, argv
) == ERROR
)
172 usage4 (_("Could not parse arguments"));
177 if (usepid
|| stat_exe(mypid
, &statbuf
) == -1) {
178 /* usepid might have been set by -T */
182 mydev
= statbuf
.st_dev
;
183 myino
= statbuf
.st_ino
;
186 /* Set signal handling and alarm timeout */
187 if (signal (SIGALRM
, timeout_alarm_handler
) == SIG_ERR
) {
188 die (STATE_UNKNOWN
, _("Cannot catch SIGALRM"));
190 (void) alarm ((unsigned) timeout_interval
);
193 printf (_("CMD: %s\n"), PS_COMMAND
);
195 if (input_filename
== NULL
) {
196 result
= cmd_run( PS_COMMAND
, &chld_out
, &chld_err
, 0);
197 if (chld_err
.lines
> 0) {
198 printf ("%s: %s", _("System call sent warnings to stderr"), chld_err
.line
[0]);
202 result
= cmd_file_read( input_filename
, &chld_out
, 0);
205 /* flush first line: j starts at 1 */
206 for (j
= 1; j
< chld_out
.lines
; j
++) {
207 input_line
= chld_out
.line
[j
];
210 printf ("%s", input_line
);
212 strcpy (procprog
, "");
213 xasprintf (&procargs
, "%s", "");
215 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
217 /* Zombie processes do not give a procprog command */
218 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
219 cols
= expected_cols
;
221 if ( cols
>= expected_cols
) {
223 xasprintf (&procargs
, "%s", input_line
+ pos
);
226 /* Some ps return full pathname for command. This removes path */
227 strcpy(procprog
, base_name(procprog
));
229 /* we need to convert the elapsed time to seconds */
230 procseconds
= convert_to_seconds(procetime
);
233 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
234 procs
, procuid
, procvsz
, procrss
,
235 procpid
, procppid
, procpcpu
, procstat
,
236 procetime
, procprog
, procargs
);
239 if ((usepid
&& mypid
== procpid
) ||
240 (!usepid
&& ((ret
= stat_exe(procpid
, &statbuf
) != -1) && statbuf
.st_dev
== mydev
&& statbuf
.st_ino
== myino
) ||
241 (ret
== -1 && errno
== ENOENT
))) {
243 printf("not considering - is myself or gone\n");
247 else if (myppid
== procpid
) {
249 printf("not considering - is parent\n");
253 /* filter kernel threads (childs of KTHREAD_PARENT)*/
254 /* TODO adapt for other OSes than GNU/Linux
255 sorry for not doing that, but I've no other OSes to test :-( */
256 if (kthread_filter
== 1) {
257 /* get pid KTHREAD_PARENT */
258 if (kthread_ppid
== 0 && !strcmp(procprog
, KTHREAD_PARENT
) )
259 kthread_ppid
= procpid
;
261 if (kthread_ppid
== procppid
) {
263 printf ("Ignore kernel thread: pid=%d ppid=%d prog=%s args=%s\n", procpid
, procppid
, procprog
, procargs
);
268 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
270 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
272 if ((options
& EREG_ARGS
) && procargs
&& (regexec(&re_args
, procargs
, (size_t) 0, NULL
, 0) == 0))
273 resultsum
|= EREG_ARGS
;
274 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
276 if ((options
& PPID
) && (procppid
== ppid
))
278 if ((options
& USER
) && (procuid
== uid
))
280 if ((options
& VSZ
) && (procvsz
>= vsz
))
282 if ((options
& RSS
) && (procrss
>= rss
))
284 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
289 /* Next line if filters not matched */
290 if (!(options
== resultsum
|| options
== ALL
))
295 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
296 procuid
, procvsz
, procrss
,
297 procpid
, procppid
, procpcpu
, procstat
,
298 procetime
, procprog
, procargs
);
301 if (metric
== METRIC_VSZ
)
302 i
= get_status ((double)procvsz
, procs_thresholds
);
303 else if (metric
== METRIC_RSS
)
304 i
= get_status ((double)procrss
, procs_thresholds
);
305 /* TODO? float thresholds for --metric=CPU */
306 else if (metric
== METRIC_CPU
)
307 i
= get_status (procpcpu
, procs_thresholds
);
308 else if (metric
== METRIC_ELAPSED
)
309 i
= get_status ((double)procseconds
, procs_thresholds
);
311 if (metric
!= METRIC_PROCS
) {
312 if (i
== STATE_WARNING
) {
314 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
315 result
= max_state (result
, i
);
317 if (i
== STATE_CRITICAL
) {
319 xasprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
320 result
= max_state (result
, i
);
324 /* This should not happen */
326 printf(_("Not parseable: %s"), input_buffer
);
330 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
331 printf (_("Unable to read output\n"));
332 return STATE_UNKNOWN
;
335 if ( result
== STATE_UNKNOWN
)
338 /* Needed if procs found, but none match filter */
339 if ( metric
== METRIC_PROCS
) {
340 result
= max_state (result
, get_status ((double)procs
, procs_thresholds
) );
343 if ( result
== STATE_OK
) {
344 printf ("%s %s: ", metric_name
, _("OK"));
345 } else if (result
== STATE_WARNING
) {
346 printf ("%s %s: ", metric_name
, _("WARNING"));
347 if ( metric
!= METRIC_PROCS
) {
348 printf (_("%d warn out of "), warn
);
350 } else if (result
== STATE_CRITICAL
) {
351 printf ("%s %s: ", metric_name
, _("CRITICAL"));
352 if (metric
!= METRIC_PROCS
) {
353 printf (_("%d crit, %d warn out of "), crit
, warn
);
356 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
358 if (strcmp(fmt
,"") != 0) {
359 printf (_(" with %s"), fmt
);
362 if ( verbose
>= 1 && strcmp(fails
,"") )
363 printf (" [%s]", fails
);
365 if (metric
== METRIC_PROCS
)
366 printf (" | procs=%d;%s;%s;0;", procs
,
367 warning_range
? warning_range
: "",
368 critical_range
? critical_range
: "");
370 printf (" | procs=%d;;;0; procs_warn=%d;;;0; procs_crit=%d;;;0;", procs
, warn
, crit
);
378 /* process command-line arguments */
380 process_arguments (int argc
, char **argv
)
387 int cflags
= REG_NOSUB
| REG_EXTENDED
;
388 char errbuf
[MAX_INPUT_BUFFER
];
391 static struct option longopts
[] = {
392 {"warning", required_argument
, 0, 'w'},
393 {"critical", required_argument
, 0, 'c'},
394 {"metric", required_argument
, 0, 'm'},
395 {"timeout", required_argument
, 0, 't'},
396 {"status", required_argument
, 0, 's'},
397 {"ppid", required_argument
, 0, 'p'},
398 {"user", required_argument
, 0, 'u'},
399 {"command", required_argument
, 0, 'C'},
400 {"vsz", required_argument
, 0, 'z'},
401 {"rss", required_argument
, 0, 'r'},
402 {"pcpu", required_argument
, 0, 'P'},
403 {"elapsed", required_argument
, 0, 'e'},
404 {"argument-array", required_argument
, 0, 'a'},
405 {"help", no_argument
, 0, 'h'},
406 {"version", no_argument
, 0, 'V'},
407 {"verbose", no_argument
, 0, 'v'},
408 {"ereg-argument-array", required_argument
, 0, CHAR_MAX
+1},
409 {"input-file", required_argument
, 0, CHAR_MAX
+2},
410 {"no-kthreads", required_argument
, 0, 'k'},
411 {"traditional-filter", no_argument
, 0, 'T'},
415 for (c
= 1; c
< argc
; c
++)
416 if (strcmp ("-to", argv
[c
]) == 0)
417 strcpy (argv
[c
], "-t");
420 c
= getopt_long (argc
, argv
, "Vvhkt:c:w:p:s:u:C:a:z:r:m:P:T",
423 if (c
== -1 || c
== EOF
)
431 exit (STATE_UNKNOWN
);
432 case 'V': /* version */
433 print_revision (progname
, NP_VERSION
);
434 exit (STATE_UNKNOWN
);
435 case 't': /* timeout period */
436 if (!is_integer (optarg
))
437 usage2 (_("Timeout interval must be a positive integer"), optarg
);
439 timeout_interval
= atoi (optarg
);
441 case 'c': /* critical threshold */
442 critical_range
= optarg
;
444 case 'w': /* warning threshold */
445 warning_range
= optarg
;
447 case 'p': /* process id */
448 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
449 xasprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
453 usage4 (_("Parent Process ID must be an integer!"));
454 case 's': /* status */
459 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
462 case 'u': /* user or user id */
463 if (is_integer (optarg
)) {
465 pw
= getpwuid ((uid_t
) uid
);
466 /* check to be sure user exists */
468 usage2 (_("UID was not found"), optarg
);
471 pw
= getpwnam (optarg
);
472 /* check to be sure user exists */
474 usage2 (_("User name was not found"), optarg
);
479 xasprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
483 case 'C': /* command */
484 /* TODO: allow this to be passed in with --metric */
489 xasprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
493 case 'a': /* args (full path name with args) */
494 /* TODO: allow this to be passed in with --metric */
499 xasprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
503 err
= regcomp(&re_args
, optarg
, cflags
);
505 regerror (err
, &re_args
, errbuf
, MAX_INPUT_BUFFER
);
506 die (STATE_UNKNOWN
, "PROCS %s: %s - %s\n", _("UNKNOWN"), _("Could not compile regular expression"), errbuf
);
508 /* Strip off any | within the regex optarg */
509 temp_string
= strdup(optarg
);
510 while(temp_string
[i
]!='\0'){
511 if(temp_string
[i
]=='|')
515 xasprintf (&fmt
, "%s%sregex args '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), temp_string
);
516 options
|= EREG_ARGS
;
519 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
520 xasprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
524 usage4 (_("RSS must be an integer!"));
526 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
527 xasprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
531 usage4 (_("VSZ must be an integer!"));
533 /* TODO: -P 1.5.5 is accepted */
534 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
535 xasprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
539 usage4 (_("PCPU must be a float!"));
541 xasprintf (&metric_name
, "%s", optarg
);
542 if ( strcmp(optarg
, "PROCS") == 0) {
543 metric
= METRIC_PROCS
;
546 else if ( strcmp(optarg
, "VSZ") == 0) {
550 else if ( strcmp(optarg
, "RSS") == 0 ) {
554 else if ( strcmp(optarg
, "CPU") == 0 ) {
558 else if ( strcmp(optarg
, "ELAPSED") == 0) {
559 metric
= METRIC_ELAPSED
;
563 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
564 case 'k': /* linux kernel thread filter */
567 case 'v': /* command */
574 input_filename
= optarg
;
580 if ((! warning_range
) && argv
[c
])
581 warning_range
= argv
[c
++];
582 if ((! critical_range
) && argv
[c
])
583 critical_range
= argv
[c
++];
584 if (statopts
== NULL
&& argv
[c
]) {
585 xasprintf (&statopts
, "%s", argv
[c
++]);
586 xasprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
590 /* this will abort in case of invalid ranges */
591 set_thresholds (&procs_thresholds
, warning_range
, critical_range
);
593 return validate_arguments ();
599 validate_arguments ()
605 statopts
= strdup("");
623 /* convert the elapsed time to seconds */
625 convert_to_seconds(char *etime
) {
644 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
657 sscanf(etime
, "%d-%d:%d:%d",
658 &days
, &hours
, &minutes
, &seconds
);
659 /* linux 2.6.5/2.6.6 reporting some processes with infinite
660 * elapsed times for some reason */
666 sscanf(etime
, "%d:%d:%d",
667 &hours
, &minutes
, &seconds
);
668 } else if (coloncnt
== 1) {
669 sscanf(etime
, "%d:%d",
674 total
= (days
* 86400) +
679 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
680 printf("seconds: %d\n", total
);
689 print_revision (progname
, NP_VERSION
);
691 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
692 printf (COPYRIGHT
, copyright
, email
);
694 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
695 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
696 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
702 printf (UT_HELP_VRSN
);
703 printf (UT_EXTRA_OPTS
);
704 printf (" %s\n", "-w, --warning=RANGE");
705 printf (" %s\n", _("Generate warning state if metric is outside this range"));
706 printf (" %s\n", "-c, --critical=RANGE");
707 printf (" %s\n", _("Generate critical state if metric is outside this range"));
708 printf (" %s\n", "-m, --metric=TYPE");
709 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
710 printf (" %s\n", _("PROCS - number of processes (default)"));
711 printf (" %s\n", _("VSZ - virtual memory size"));
712 printf (" %s\n", _("RSS - resident set memory size"));
713 printf (" %s\n", _("CPU - percentage CPU"));
714 /* only linux etime is support currently */
715 #if defined( __linux__ )
716 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
717 #endif /* defined(__linux__) */
718 printf (UT_PLUG_TIMEOUT
, DEFAULT_SOCKET_TIMEOUT
);
720 printf (" %s\n", "-v, --verbose");
721 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
723 printf (" %s\n", "-T, --traditional");
724 printf (" %s\n", _("Filter own process the traditional way by PID instead of /proc/pid/exe"));
727 printf ("%s\n", "Filters:");
728 printf (" %s\n", "-s, --state=STATUSFLAGS");
729 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
730 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
731 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
732 printf (" %s\n", "-p, --ppid=PPID");
733 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
734 printf (" %s\n", "-z, --vsz=VSZ");
735 printf (" %s\n", _("Only scan for processes with VSZ higher than indicated."));
736 printf (" %s\n", "-r, --rss=RSS");
737 printf (" %s\n", _("Only scan for processes with RSS higher than indicated."));
738 printf (" %s\n", "-P, --pcpu=PCPU");
739 printf (" %s\n", _("Only scan for processes with PCPU higher than indicated."));
740 printf (" %s\n", "-u, --user=USER");
741 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
742 printf (" %s\n", "-a, --argument-array=STRING");
743 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
744 printf (" %s\n", "--ereg-argument-array=STRING");
745 printf (" %s\n", _("Only scan for processes with args that contain the regex STRING."));
746 printf (" %s\n", "-C, --command=COMMAND");
747 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
748 printf (" %s\n", "-k, --no-kthreads");
749 printf (" %s\n", _("Only scan for non kernel threads (works on Linux only)."));
752 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
753 specified 'max:min', a warning status will be generated if the\n\
754 count is inside the specified range\n\n"));
757 This plugin checks the number of currently running processes and\n\
758 generates WARNING or CRITICAL states if the process count is outside\n\
759 the specified threshold ranges. The process count can be filtered by\n\
760 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
761 be the total number of running processes\n\n"));
763 printf ("%s\n", _("Examples:"));
764 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
765 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
766 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
767 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
768 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
769 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
770 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
771 printf (" %s\n\n", _("Alert if VSZ of any processes over 50K or 100K"));
772 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
773 printf (" %s\n", _("Alert if CPU of any processes over 10%% or 20%%"));
781 printf ("%s\n", _("Usage:"));
782 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
783 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
784 printf (" [-C command] [-k] [-t timeout] [-v]\n");