1 /*****************************************************************************
3 * Nagios check_procs plugin
6 * Copyright (c) 2000-2008 Nagios Plugins Development Team
10 * This file contains the check_procs plugin
12 * Checks all processes and generates WARNING or CRITICAL states if the
13 * specified metric is outside the required threshold ranges. The metric
14 * defaults to number of processes. Search filters can be applied to limit
15 * the processes to check.
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
32 *****************************************************************************/
34 const char *progname
= "check_procs";
35 const char *program_name
= "check_procs"; /* Required for coreutils libs */
36 const char *copyright
= "2000-2008";
37 const char *email
= "nagiosplug-devel@lists.sourceforge.net";
41 #include "utils_cmd.h"
46 int process_arguments (int, char **);
47 int validate_arguments (void);
48 int check_thresholds (int);
49 int convert_to_seconds (char *);
50 void print_help (void);
51 void print_usage (void);
58 int options
= 0; /* bitmask of filter criteria to test against */
69 #define EREG_ARGS 1024
70 /* Different metrics */
79 enum metric metric
= METRIC_PROCS
;
90 char *input_filename
= NULL
;
94 char tmp
[MAX_INPUT_BUFFER
];
96 FILE *ps_input
= NULL
;
100 main (int argc
, char **argv
)
115 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
118 const char *zombie
= "Z";
120 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
121 int found
= 0; /* counter for number of lines returned in `ps` output */
122 int procs
= 0; /* counter for number of processes meeting filter criteria */
123 int pos
; /* number of spaces before 'args' in `ps` output */
124 int cols
; /* number of columns in ps output */
125 int expected_cols
= PS_COLS
- 1;
126 int warn
= 0; /* number of processes in warn state */
127 int crit
= 0; /* number of processes in crit state */
129 int result
= STATE_UNKNOWN
;
130 output chld_out
, chld_err
;
132 setlocale (LC_ALL
, "");
133 bindtextdomain (PACKAGE
, LOCALEDIR
);
134 textdomain (PACKAGE
);
135 setlocale(LC_NUMERIC
, "POSIX");
137 input_buffer
= malloc (MAX_INPUT_BUFFER
);
138 procprog
= malloc (MAX_INPUT_BUFFER
);
140 asprintf (&metric_name
, "PROCS");
141 metric
= METRIC_PROCS
;
143 /* Parse extra opts if any */
144 argv
=np_extra_opts (&argc
, argv
, progname
);
146 if (process_arguments (argc
, argv
) == ERROR
)
147 usage4 (_("Could not parse arguments"));
152 /* Set signal handling and alarm timeout */
153 if (signal (SIGALRM
, timeout_alarm_handler
) == SIG_ERR
) {
154 die (STATE_UNKNOWN
, _("Cannot catch SIGALRM"));
156 (void) alarm ((unsigned) timeout_interval
);
159 printf (_("CMD: %s\n"), PS_COMMAND
);
161 if (input_filename
== NULL
) {
162 result
= cmd_run( PS_COMMAND
, &chld_out
, &chld_err
, 0);
163 if (chld_err
.lines
> 0) {
164 printf ("%s: %s", _("System call sent warnings to stderr"), chld_err
.line
[0]);
168 result
= cmd_file_read( input_filename
, &chld_out
, 0);
171 /* flush first line: j starts at 1 */
172 for (j
= 1; j
< chld_out
.lines
; j
++) {
173 input_line
= chld_out
.line
[j
];
176 printf ("%s", input_line
);
178 strcpy (procprog
, "");
179 asprintf (&procargs
, "%s", "");
181 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
183 /* Zombie processes do not give a procprog command */
184 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
185 cols
= expected_cols
;
187 if ( cols
>= expected_cols
) {
189 asprintf (&procargs
, "%s", input_line
+ pos
);
192 /* Some ps return full pathname for command. This removes path */
193 strcpy(procprog
, base_name(procprog
));
195 /* we need to convert the elapsed time to seconds */
196 procseconds
= convert_to_seconds(procetime
);
199 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
200 procs
, procuid
, procvsz
, procrss
,
201 procpid
, procppid
, procpcpu
, procstat
,
202 procetime
, procprog
, procargs
);
205 if (mypid
== procpid
) continue;
207 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
209 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
211 if ((options
& EREG_ARGS
) && procargs
&& (regexec(&re_args
, procargs
, (size_t) 0, NULL
, 0) == 0))
212 resultsum
|= EREG_ARGS
;
213 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
215 if ((options
& PPID
) && (procppid
== ppid
))
217 if ((options
& USER
) && (procuid
== uid
))
219 if ((options
& VSZ
) && (procvsz
>= vsz
))
221 if ((options
& RSS
) && (procrss
>= rss
))
223 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
228 /* Next line if filters not matched */
229 if (!(options
== resultsum
|| options
== ALL
))
234 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
235 procuid
, procvsz
, procrss
,
236 procpid
, procppid
, procpcpu
, procstat
,
237 procetime
, procprog
, procargs
);
240 if (metric
== METRIC_VSZ
)
241 i
= check_thresholds (procvsz
);
242 else if (metric
== METRIC_RSS
)
243 i
= check_thresholds (procrss
);
244 /* TODO? float thresholds for --metric=CPU */
245 else if (metric
== METRIC_CPU
)
246 i
= check_thresholds ((int)procpcpu
);
247 else if (metric
== METRIC_ELAPSED
)
248 i
= check_thresholds (procseconds
);
250 if (metric
!= METRIC_PROCS
) {
251 if (i
== STATE_WARNING
) {
253 asprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
254 result
= max_state (result
, i
);
256 if (i
== STATE_CRITICAL
) {
258 asprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
259 result
= max_state (result
, i
);
263 /* This should not happen */
265 printf(_("Not parseable: %s"), input_buffer
);
269 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
270 printf (_("Unable to read output\n"));
271 return STATE_UNKNOWN
;
274 if ( result
== STATE_UNKNOWN
)
277 /* Needed if procs found, but none match filter */
278 if ( metric
== METRIC_PROCS
) {
279 result
= max_state (result
, check_thresholds (procs
) );
282 if ( result
== STATE_OK
) {
283 printf ("%s %s: ", metric_name
, _("OK"));
284 } else if (result
== STATE_WARNING
) {
285 printf ("%s %s: ", metric_name
, _("WARNING"));
286 if ( metric
!= METRIC_PROCS
) {
287 printf (_("%d warn out of "), warn
);
289 } else if (result
== STATE_CRITICAL
) {
290 printf ("%s %s: ", metric_name
, _("CRITICAL"));
291 if (metric
!= METRIC_PROCS
) {
292 printf (_("%d crit, %d warn out of "), crit
, warn
);
295 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
297 if (strcmp(fmt
,"") != 0) {
298 printf (_(" with %s"), fmt
);
301 if ( verbose
>= 1 && strcmp(fails
,"") )
302 printf (" [%s]", fails
);
310 /* process command-line arguments */
312 process_arguments (int argc
, char **argv
)
319 int cflags
= REG_NOSUB
| REG_EXTENDED
;
320 char errbuf
[MAX_INPUT_BUFFER
];
321 static struct option longopts
[] = {
322 {"warning", required_argument
, 0, 'w'},
323 {"critical", required_argument
, 0, 'c'},
324 {"metric", required_argument
, 0, 'm'},
325 {"timeout", required_argument
, 0, 't'},
326 {"status", required_argument
, 0, 's'},
327 {"ppid", required_argument
, 0, 'p'},
328 {"command", required_argument
, 0, 'C'},
329 {"vsz", required_argument
, 0, 'z'},
330 {"rss", required_argument
, 0, 'r'},
331 {"pcpu", required_argument
, 0, 'P'},
332 {"elapsed", required_argument
, 0, 'e'},
333 {"argument-array", required_argument
, 0, 'a'},
334 {"help", no_argument
, 0, 'h'},
335 {"version", no_argument
, 0, 'V'},
336 {"verbose", no_argument
, 0, 'v'},
337 {"ereg-argument-array", required_argument
, 0, CHAR_MAX
+1},
338 {"input-file", required_argument
, 0, CHAR_MAX
+2},
342 for (c
= 1; c
< argc
; c
++)
343 if (strcmp ("-to", argv
[c
]) == 0)
344 strcpy (argv
[c
], "-t");
347 c
= getopt_long (argc
, argv
, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
350 if (c
== -1 || c
== EOF
)
359 case 'V': /* version */
360 print_revision (progname
, NP_VERSION
);
362 case 't': /* timeout period */
363 if (!is_integer (optarg
))
364 usage2 (_("Timeout interval must be a positive integer"), optarg
);
366 timeout_interval
= atoi (optarg
);
368 case 'c': /* critical threshold */
369 if (is_integer (optarg
))
370 cmax
= atoi (optarg
);
371 else if (sscanf (optarg
, ":%d", &cmax
) == 1)
373 else if (sscanf (optarg
, "%d:%d", &cmin
, &cmax
) == 2)
375 else if (sscanf (optarg
, "%d:", &cmin
) == 1)
378 usage4 (_("Critical Process Count must be an integer!"));
380 case 'w': /* warning threshold */
381 if (is_integer (optarg
))
382 wmax
= atoi (optarg
);
383 else if (sscanf (optarg
, ":%d", &wmax
) == 1)
385 else if (sscanf (optarg
, "%d:%d", &wmin
, &wmax
) == 2)
387 else if (sscanf (optarg
, "%d:", &wmin
) == 1)
390 usage4 (_("Warning Process Count must be an integer!"));
392 case 'p': /* process id */
393 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
394 asprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
398 usage4 (_("Parent Process ID must be an integer!"));
399 case 's': /* status */
404 asprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
407 case 'u': /* user or user id */
408 if (is_integer (optarg
)) {
410 pw
= getpwuid ((uid_t
) uid
);
411 /* check to be sure user exists */
413 usage2 (_("UID was not found"), optarg
);
416 pw
= getpwnam (optarg
);
417 /* check to be sure user exists */
419 usage2 (_("User name was not found"), optarg
);
424 asprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
428 case 'C': /* command */
429 /* TODO: allow this to be passed in with --metric */
434 asprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
438 case 'a': /* args (full path name with args) */
439 /* TODO: allow this to be passed in with --metric */
444 asprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
448 err
= regcomp(&re_args
, optarg
, cflags
);
450 regerror (err
, &re_args
, errbuf
, MAX_INPUT_BUFFER
);
451 die (STATE_UNKNOWN
, "PROCS %s: %s - %s\n", _("UNKNOWN"), _("Could not compile regular expression"), errbuf
);
453 asprintf (&fmt
, "%s%sregex args '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), optarg
);
454 options
|= EREG_ARGS
;
457 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
458 asprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
462 usage4 (_("RSS must be an integer!"));
464 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
465 asprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
469 usage4 (_("VSZ must be an integer!"));
471 /* TODO: -P 1.5.5 is accepted */
472 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
473 asprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
477 usage4 (_("PCPU must be a float!"));
479 asprintf (&metric_name
, "%s", optarg
);
480 if ( strcmp(optarg
, "PROCS") == 0) {
481 metric
= METRIC_PROCS
;
484 else if ( strcmp(optarg
, "VSZ") == 0) {
488 else if ( strcmp(optarg
, "RSS") == 0 ) {
492 else if ( strcmp(optarg
, "CPU") == 0 ) {
496 else if ( strcmp(optarg
, "ELAPSED") == 0) {
497 metric
= METRIC_ELAPSED
;
501 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
502 case 'v': /* command */
506 input_filename
= optarg
;
512 if (wmax
== -1 && argv
[c
])
513 wmax
= atoi (argv
[c
++]);
514 if (cmax
== -1 && argv
[c
])
515 cmax
= atoi (argv
[c
++]);
516 if (statopts
== NULL
&& argv
[c
]) {
517 asprintf (&statopts
, "%s", argv
[c
++]);
518 asprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
522 return validate_arguments ();
528 validate_arguments ()
531 if (wmax
>= 0 && wmin
== -1)
533 if (cmax
>= 0 && cmin
== -1)
535 if (wmax
>= wmin
&& cmax
>= cmin
) { /* standard ranges */
536 if (wmax
> cmax
&& cmax
!= -1) {
537 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax
, cmax
);
540 if (cmin
> wmin
&& wmin
!= -1) {
541 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin
, cmin
);
546 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
547 /* printf ("At least one threshold must be set\n"); */
555 statopts
= strdup("");
574 /* Check thresholds against value */
576 check_thresholds (int value
)
578 if (wmax
== -1 && cmax
== -1 && wmin
== -1 && cmin
== -1) {
581 else if (cmax
>= 0 && cmin
>= 0 && cmax
< cmin
) {
582 if (value
> cmax
&& value
< cmin
)
583 return STATE_CRITICAL
;
585 else if (cmax
>= 0 && value
> cmax
) {
586 return STATE_CRITICAL
;
588 else if (cmin
>= 0 && value
< cmin
) {
589 return STATE_CRITICAL
;
592 if (wmax
>= 0 && wmin
>= 0 && wmax
< wmin
) {
593 if (value
> wmax
&& value
< wmin
) {
594 return STATE_WARNING
;
597 else if (wmax
>= 0 && value
> wmax
) {
598 return STATE_WARNING
;
600 else if (wmin
>= 0 && value
< wmin
) {
601 return STATE_WARNING
;
607 /* convert the elapsed time to seconds */
609 convert_to_seconds(char *etime
) {
628 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
641 sscanf(etime
, "%d-%d:%d:%d",
642 &days
, &hours
, &minutes
, &seconds
);
643 /* linux 2.6.5/2.6.6 reporting some processes with infinite
644 * elapsed times for some reason */
650 sscanf(etime
, "%d:%d:%d",
651 &hours
, &minutes
, &seconds
);
652 } else if (coloncnt
== 1) {
653 sscanf(etime
, "%d:%d",
658 total
= (days
* 86400) +
663 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
664 printf("seconds: %d\n", total
);
673 print_revision (progname
, NP_VERSION
);
675 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
676 printf (COPYRIGHT
, copyright
, email
);
678 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
679 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
680 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
686 printf (_(UT_HELP_VRSN
));
687 printf (_(UT_EXTRA_OPTS
));
688 printf (" %s\n", "-w, --warning=RANGE");
689 printf (" %s\n", _("Generate warning state if metric is outside this range"));
690 printf (" %s\n", "-c, --critical=RANGE");
691 printf (" %s\n", _("Generate critical state if metric is outside this range"));
692 printf (" %s\n", "-m, --metric=TYPE");
693 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
694 printf (" %s\n", _("PROCS - number of processes (default)"));
695 printf (" %s\n", _("VSZ - virtual memory size"));
696 printf (" %s\n", _("RSS - resident set memory size"));
697 printf (" %s\n", _("CPU - percentage CPU"));
698 /* only linux etime is support currently */
699 #if defined( __linux__ )
700 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
701 #endif /* defined(__linux__) */
702 printf (_(UT_TIMEOUT
), DEFAULT_SOCKET_TIMEOUT
);
704 printf (" %s\n", "-v, --verbose");
705 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
708 printf ("%s\n", "Filters:");
709 printf (" %s\n", "-s, --state=STATUSFLAGS");
710 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
711 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
712 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
713 printf (" %s\n", "-p, --ppid=PPID");
714 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
715 printf (" %s\n", "-z, --vsz=VSZ");
716 printf (" %s\n", _("Only scan for processes with VSZ higher than indicated."));
717 printf (" %s\n", "-r, --rss=RSS");
718 printf (" %s\n", _("Only scan for processes with RSS higher than indicated."));
719 printf (" %s\n", "-P, --pcpu=PCPU");
720 printf (" %s\n", _("Only scan for processes with PCPU higher than indicated."));
721 printf (" %s\n", "-u, --user=USER");
722 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
723 printf (" %s\n", "-a, --argument-array=STRING");
724 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
725 printf (" %s\n", "--ereg-argument-array=STRING");
726 printf (" %s\n", _("Only scan for processes with args that contain the regex STRING."));
727 printf (" %s\n", "-C, --command=COMMAND");
728 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
731 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
732 specified 'max:min', a warning status will be generated if the\n\
733 count is inside the specified range\n\n"));
736 This plugin checks the number of currently running processes and\n\
737 generates WARNING or CRITICAL states if the process count is outside\n\
738 the specified threshold ranges. The process count can be filtered by\n\
739 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
740 be the total number of running processes\n\n"));
743 printf ("%s\n", _("Notes:"));
744 printf (_(UT_EXTRA_OPTS_NOTES
));
748 printf ("%s\n", _("Examples:"));
749 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
750 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
751 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
752 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
753 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
754 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
755 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
756 printf (" %s\n\n", _("Alert if VSZ of any processes over 50K or 100K"));
757 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
758 printf (" %s\n", _("Alert if CPU of any processes over 10%% or 20%%"));
760 printf (_(UT_SUPPORT
));
766 printf (_("Usage: "));
767 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
768 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
769 printf (" [-C command] [-t timeout] [-v]\n");