1 /*****************************************************************************
3 * Nagios check_procs plugin
6 * Copyright (c) 2000-2008 Nagios Plugins Development Team
8 * Last Modified: $Date$
12 * This file contains the check_procs plugin
14 * Checks all processes and generates WARNING or CRITICAL states if the
15 * specified metric is outside the required threshold ranges. The metric
16 * defaults to number of processes. Search filters can be applied to limit
17 * the processes to check.
20 * This program is free software: you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation, either version 3 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program. If not, see <http://www.gnu.org/licenses/>.
35 *****************************************************************************/
37 const char *progname
= "check_procs";
38 const char *program_name
= "check_procs"; /* Required for coreutils libs */
39 const char *revision
= "$Revision$";
40 const char *copyright
= "2000-2008";
41 const char *email
= "nagiosplug-devel@lists.sourceforge.net";
50 int process_arguments (int, char **);
51 int validate_arguments (void);
52 int check_thresholds (int);
53 int convert_to_seconds (char *);
54 void print_help (void);
55 void print_usage (void);
62 int options
= 0; /* bitmask of filter criteria to test against */
73 #define EREG_ARGS 1024
74 /* Different metrics */
83 enum metric metric
= METRIC_PROCS
;
94 char *input_filename
= NULL
;
98 char tmp
[MAX_INPUT_BUFFER
];
100 FILE *ps_input
= NULL
;
104 main (int argc
, char **argv
)
119 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
122 const char *zombie
= "Z";
124 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
125 int found
= 0; /* counter for number of lines returned in `ps` output */
126 int procs
= 0; /* counter for number of processes meeting filter criteria */
127 int pos
; /* number of spaces before 'args' in `ps` output */
128 int cols
; /* number of columns in ps output */
129 int expected_cols
= PS_COLS
- 1;
130 int warn
= 0; /* number of processes in warn state */
131 int crit
= 0; /* number of processes in crit state */
133 int result
= STATE_UNKNOWN
;
135 setlocale (LC_ALL
, "");
136 bindtextdomain (PACKAGE
, LOCALEDIR
);
137 textdomain (PACKAGE
);
138 setlocale(LC_NUMERIC
, "POSIX");
140 input_buffer
= malloc (MAX_INPUT_BUFFER
);
141 procprog
= malloc (MAX_INPUT_BUFFER
);
143 asprintf (&metric_name
, "PROCS");
144 metric
= METRIC_PROCS
;
146 /* Parse extra opts if any */
147 argv
=np_extra_opts (&argc
, argv
, progname
);
149 if (process_arguments (argc
, argv
) == ERROR
)
150 usage4 (_("Could not parse arguments"));
155 /* Set signal handling and alarm timeout */
156 if (signal (SIGALRM
, popen_timeout_alarm_handler
) == SIG_ERR
) {
157 usage4 (_("Cannot catch SIGALRM"));
159 alarm (timeout_interval
);
162 printf (_("CMD: %s\n"), PS_COMMAND
);
164 if (input_filename
== NULL
) {
165 ps_input
= spopen (PS_COMMAND
);
166 if (ps_input
== NULL
) {
167 printf (_("Could not open pipe: %s\n"), PS_COMMAND
);
168 return STATE_UNKNOWN
;
170 child_stderr
= fdopen (child_stderr_array
[fileno (ps_input
)], "r");
171 if (child_stderr
== NULL
)
172 printf (_("Could not open stderr for %s\n"), PS_COMMAND
);
174 ps_input
= fopen(input_filename
, "r");
175 if (ps_input
== NULL
) {
176 die( STATE_UNKNOWN
, _("Error opening %s\n"), input_filename
);
180 /* flush first line */
181 fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, ps_input
);
182 while ( input_buffer
[strlen(input_buffer
)-1] != '\n' )
183 fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, ps_input
);
185 while (fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, ps_input
)) {
186 asprintf (&input_line
, "%s", input_buffer
);
187 while ( input_buffer
[strlen(input_buffer
)-1] != '\n' ) {
188 fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, ps_input
);
189 asprintf (&input_line
, "%s%s", input_line
, input_buffer
);
193 printf ("%s", input_line
);
195 strcpy (procprog
, "");
196 asprintf (&procargs
, "%s", "");
198 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
200 /* Zombie processes do not give a procprog command */
201 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
202 cols
= expected_cols
;
204 if ( cols
>= expected_cols
) {
206 asprintf (&procargs
, "%s", input_line
+ pos
);
209 /* Some ps return full pathname for command. This removes path */
210 strcpy(procprog
, base_name(procprog
));
212 /* we need to convert the elapsed time to seconds */
213 procseconds
= convert_to_seconds(procetime
);
216 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
217 procs
, procuid
, procvsz
, procrss
,
218 procpid
, procppid
, procpcpu
, procstat
,
219 procetime
, procprog
, procargs
);
222 if (mypid
== procpid
) continue;
224 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
226 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
228 if ((options
& EREG_ARGS
) && procargs
&& (regexec(&re_args
, procargs
, (size_t) 0, NULL
, 0) == 0))
229 resultsum
|= EREG_ARGS
;
230 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
232 if ((options
& PPID
) && (procppid
== ppid
))
234 if ((options
& USER
) && (procuid
== uid
))
236 if ((options
& VSZ
) && (procvsz
>= vsz
))
238 if ((options
& RSS
) && (procrss
>= rss
))
240 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
245 /* Next line if filters not matched */
246 if (!(options
== resultsum
|| options
== ALL
))
251 printf ("Matched: uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
252 procuid
, procvsz
, procrss
,
253 procpid
, procppid
, procpcpu
, procstat
,
254 procetime
, procprog
, procargs
);
257 if (metric
== METRIC_VSZ
)
258 i
= check_thresholds (procvsz
);
259 else if (metric
== METRIC_RSS
)
260 i
= check_thresholds (procrss
);
261 /* TODO? float thresholds for --metric=CPU */
262 else if (metric
== METRIC_CPU
)
263 i
= check_thresholds ((int)procpcpu
);
264 else if (metric
== METRIC_ELAPSED
)
265 i
= check_thresholds (procseconds
);
267 if (metric
!= METRIC_PROCS
) {
268 if (i
== STATE_WARNING
) {
270 asprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
271 result
= max_state (result
, i
);
273 if (i
== STATE_CRITICAL
) {
275 asprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
276 result
= max_state (result
, i
);
280 /* This should not happen */
282 printf(_("Not parseable: %s"), input_buffer
);
286 /* If we get anything on STDERR, at least set warning */
287 if (input_filename
== NULL
) {
288 while (fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, child_stderr
)) {
290 printf ("STDERR: %s", input_buffer
);
291 result
= max_state (result
, STATE_WARNING
);
292 printf (_("System call sent warnings to stderr\n"));
295 (void) fclose (child_stderr
);
298 if (spclose (ps_input
)) {
299 printf (_("System call returned nonzero status\n"));
300 result
= max_state (result
, STATE_WARNING
);
304 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
305 printf (_("Unable to read output\n"));
309 if ( result
== STATE_UNKNOWN
)
312 /* Needed if procs found, but none match filter */
313 if ( metric
== METRIC_PROCS
) {
314 result
= max_state (result
, check_thresholds (procs
) );
317 if ( result
== STATE_OK
) {
318 printf ("%s %s: ", metric_name
, _("OK"));
319 } else if (result
== STATE_WARNING
) {
320 printf ("%s %s: ", metric_name
, _("WARNING"));
321 if ( metric
!= METRIC_PROCS
) {
322 printf (_("%d warn out of "), warn
);
324 } else if (result
== STATE_CRITICAL
) {
325 printf ("%s %s: ", metric_name
, _("CRITICAL"));
326 if (metric
!= METRIC_PROCS
) {
327 printf (_("%d crit, %d warn out of "), crit
, warn
);
330 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
332 if (strcmp(fmt
,"") != 0) {
333 printf (_(" with %s"), fmt
);
336 if ( verbose
>= 1 && strcmp(fails
,"") )
337 printf (" [%s]", fails
);
345 /* process command-line arguments */
347 process_arguments (int argc
, char **argv
)
354 int cflags
= REG_NOSUB
| REG_EXTENDED
;
355 char errbuf
[MAX_INPUT_BUFFER
];
356 static struct option longopts
[] = {
357 {"warning", required_argument
, 0, 'w'},
358 {"critical", required_argument
, 0, 'c'},
359 {"metric", required_argument
, 0, 'm'},
360 {"timeout", required_argument
, 0, 't'},
361 {"status", required_argument
, 0, 's'},
362 {"ppid", required_argument
, 0, 'p'},
363 {"command", required_argument
, 0, 'C'},
364 {"vsz", required_argument
, 0, 'z'},
365 {"rss", required_argument
, 0, 'r'},
366 {"pcpu", required_argument
, 0, 'P'},
367 {"elapsed", required_argument
, 0, 'e'},
368 {"argument-array", required_argument
, 0, 'a'},
369 {"help", no_argument
, 0, 'h'},
370 {"version", no_argument
, 0, 'V'},
371 {"verbose", no_argument
, 0, 'v'},
372 {"ereg-argument-array", required_argument
, 0, CHAR_MAX
+1},
373 {"input-file", required_argument
, 0, CHAR_MAX
+2},
377 for (c
= 1; c
< argc
; c
++)
378 if (strcmp ("-to", argv
[c
]) == 0)
379 strcpy (argv
[c
], "-t");
382 c
= getopt_long (argc
, argv
, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
385 if (c
== -1 || c
== EOF
)
394 case 'V': /* version */
395 print_revision (progname
, revision
);
397 case 't': /* timeout period */
398 if (!is_integer (optarg
))
399 usage2 (_("Timeout interval must be a positive integer"), optarg
);
401 timeout_interval
= atoi (optarg
);
403 case 'c': /* critical threshold */
404 if (is_integer (optarg
))
405 cmax
= atoi (optarg
);
406 else if (sscanf (optarg
, ":%d", &cmax
) == 1)
408 else if (sscanf (optarg
, "%d:%d", &cmin
, &cmax
) == 2)
410 else if (sscanf (optarg
, "%d:", &cmin
) == 1)
413 usage4 (_("Critical Process Count must be an integer!"));
415 case 'w': /* warning threshold */
416 if (is_integer (optarg
))
417 wmax
= atoi (optarg
);
418 else if (sscanf (optarg
, ":%d", &wmax
) == 1)
420 else if (sscanf (optarg
, "%d:%d", &wmin
, &wmax
) == 2)
422 else if (sscanf (optarg
, "%d:", &wmin
) == 1)
425 usage4 (_("Warning Process Count must be an integer!"));
427 case 'p': /* process id */
428 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
429 asprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
433 usage4 (_("Parent Process ID must be an integer!"));
434 case 's': /* status */
439 asprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
442 case 'u': /* user or user id */
443 if (is_integer (optarg
)) {
445 pw
= getpwuid ((uid_t
) uid
);
446 /* check to be sure user exists */
448 usage2 (_("UID was not found"), optarg
);
451 pw
= getpwnam (optarg
);
452 /* check to be sure user exists */
454 usage2 (_("User name was not found"), optarg
);
459 asprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
463 case 'C': /* command */
464 /* TODO: allow this to be passed in with --metric */
469 asprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
473 case 'a': /* args (full path name with args) */
474 /* TODO: allow this to be passed in with --metric */
479 asprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
483 err
= regcomp(&re_args
, optarg
, cflags
);
485 regerror (err
, &re_args
, errbuf
, MAX_INPUT_BUFFER
);
486 die (STATE_UNKNOWN
, "PROCS %s: %s - %s\n", _("UNKNOWN"), _("Could not compile regular expression"), errbuf
);
488 asprintf (&fmt
, "%s%sregex args '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), optarg
);
489 options
|= EREG_ARGS
;
492 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
493 asprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
497 usage4 (_("RSS must be an integer!"));
499 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
500 asprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
504 usage4 (_("VSZ must be an integer!"));
506 /* TODO: -P 1.5.5 is accepted */
507 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
508 asprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
512 usage4 (_("PCPU must be a float!"));
514 asprintf (&metric_name
, "%s", optarg
);
515 if ( strcmp(optarg
, "PROCS") == 0) {
516 metric
= METRIC_PROCS
;
519 else if ( strcmp(optarg
, "VSZ") == 0) {
523 else if ( strcmp(optarg
, "RSS") == 0 ) {
527 else if ( strcmp(optarg
, "CPU") == 0 ) {
531 else if ( strcmp(optarg
, "ELAPSED") == 0) {
532 metric
= METRIC_ELAPSED
;
536 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
537 case 'v': /* command */
541 input_filename
= optarg
;
547 if (wmax
== -1 && argv
[c
])
548 wmax
= atoi (argv
[c
++]);
549 if (cmax
== -1 && argv
[c
])
550 cmax
= atoi (argv
[c
++]);
551 if (statopts
== NULL
&& argv
[c
]) {
552 asprintf (&statopts
, "%s", argv
[c
++]);
553 asprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
557 return validate_arguments ();
563 validate_arguments ()
566 if (wmax
>= 0 && wmin
== -1)
568 if (cmax
>= 0 && cmin
== -1)
570 if (wmax
>= wmin
&& cmax
>= cmin
) { /* standard ranges */
571 if (wmax
> cmax
&& cmax
!= -1) {
572 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax
, cmax
);
575 if (cmin
> wmin
&& wmin
!= -1) {
576 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin
, cmin
);
581 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
582 /* printf ("At least one threshold must be set\n"); */
590 statopts
= strdup("");
609 /* Check thresholds against value */
611 check_thresholds (int value
)
613 if (wmax
== -1 && cmax
== -1 && wmin
== -1 && cmin
== -1) {
616 else if (cmax
>= 0 && cmin
>= 0 && cmax
< cmin
) {
617 if (value
> cmax
&& value
< cmin
)
618 return STATE_CRITICAL
;
620 else if (cmax
>= 0 && value
> cmax
) {
621 return STATE_CRITICAL
;
623 else if (cmin
>= 0 && value
< cmin
) {
624 return STATE_CRITICAL
;
627 if (wmax
>= 0 && wmin
>= 0 && wmax
< wmin
) {
628 if (value
> wmax
&& value
< wmin
) {
629 return STATE_WARNING
;
632 else if (wmax
>= 0 && value
> wmax
) {
633 return STATE_WARNING
;
635 else if (wmin
>= 0 && value
< wmin
) {
636 return STATE_WARNING
;
642 /* convert the elapsed time to seconds */
644 convert_to_seconds(char *etime
) {
663 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
676 sscanf(etime
, "%d-%d:%d:%d",
677 &days
, &hours
, &minutes
, &seconds
);
678 /* linux 2.6.5/2.6.6 reporting some processes with infinite
679 * elapsed times for some reason */
685 sscanf(etime
, "%d:%d:%d",
686 &hours
, &minutes
, &seconds
);
687 } else if (coloncnt
== 1) {
688 sscanf(etime
, "%d:%d",
693 total
= (days
* 86400) +
698 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
699 printf("seconds: %d\n", total
);
708 print_revision (progname
, revision
);
710 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>\n");
711 printf (COPYRIGHT
, copyright
, email
);
713 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
714 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
715 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
721 printf (_(UT_HELP_VRSN
));
722 printf (_(UT_EXTRA_OPTS
));
723 printf (" %s\n", "-w, --warning=RANGE");
724 printf (" %s\n", _("Generate warning state if metric is outside this range"));
725 printf (" %s\n", "-c, --critical=RANGE");
726 printf (" %s\n", _("Generate critical state if metric is outside this range"));
727 printf (" %s\n", "-m, --metric=TYPE");
728 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
729 printf (" %s\n", _("PROCS - number of processes (default)"));
730 printf (" %s\n", _("VSZ - virtual memory size"));
731 printf (" %s\n", _("RSS - resident set memory size"));
732 printf (" %s\n", _("CPU - percentage cpu"));
733 /* only linux etime is support currently */
734 #if defined( __linux__ )
735 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
736 #endif /* defined(__linux__) */
737 printf (_(UT_TIMEOUT
), DEFAULT_SOCKET_TIMEOUT
);
739 printf (" %s\n", "-v, --verbose");
740 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
743 printf ("%s\n", "Filters:");
744 printf (" %s\n", "-s, --state=STATUSFLAGS");
745 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
746 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
747 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
748 printf (" %s\n", "-p, --ppid=PPID");
749 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
750 printf (" %s\n", "-z, --vsz=VSZ");
751 printf (" %s\n", _("Only scan for processes with vsz higher than indicated."));
752 printf (" %s\n", "-r, --rss=RSS");
753 printf (" %s\n", _("Only scan for processes with rss higher than indicated."));
754 printf (" %s\n", "-P, --pcpu=PCPU");
755 printf (" %s\n", _("Only scan for processes with pcpu higher than indicated."));
756 printf (" %s\n", "-u, --user=USER");
757 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
758 printf (" %s\n", "-a, --argument-array=STRING");
759 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
760 printf (" %s\n", "--ereg-argument-array=STRING");
761 printf (" %s\n", _("Only scan for processes with args that contain the regex STRING."));
762 printf (" %s\n", "-C, --command=COMMAND");
763 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
766 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
767 specified 'max:min', a warning status will be generated if the\n\
768 count is inside the specified range\n\n"));
771 This plugin checks the number of currently running processes and\n\
772 generates WARNING or CRITICAL states if the process count is outside\n\
773 the specified threshold ranges. The process count can be filtered by\n\
774 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
775 be the total number of running processes\n\n"));
778 printf ("%s\n", _("Notes:"));
779 printf (_(UT_EXTRA_OPTS_NOTES
));
783 printf ("%s\n", _("Examples:"));
784 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
785 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
786 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
787 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
788 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
789 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
790 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
791 printf (" %s\n\n", _("Alert if vsz of any processes over 50K or 100K"));
792 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
793 printf (" %s\n", _("Alert if cpu of any processes over 10%% or 20%%"));
795 printf (_(UT_SUPPORT
));
801 printf (_("Usage: "));
802 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
803 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
804 printf (" [-C command] [-t timeout] [-v]\n");