1 /******************************************************************************
3 * Nagios check_procs plugin
6 * Copyright (c) 1999-2006 nagios-plugins team
8 * Last Modified: $Date$
12 * This file contains the check_procs plugin
14 * Checks all processes and generates WARNING or CRITICAL states if the specified
15 * metric is outside the required threshold ranges. The metric defaults to number
16 * of processes. Search filters can be applied to limit the processes to check.
18 * License Information:
20 * This program is free software; you can redistribute it and/or modify
21 * it under the terms of the GNU General Public License as published by
22 * the Free Software Foundation; either version 2 of the License, or
23 * (at your option) any later version.
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
36 ******************************************************************************/
38 const char *progname
= "check_procs";
39 const char *program_name
= "check_procs"; /* Required for coreutils libs */
40 const char *revision
= "$Revision$";
41 const char *copyright
= "2000-2006";
42 const char *email
= "nagiosplug-devel@lists.sourceforge.net";
50 int process_arguments (int, char **);
51 int validate_arguments (void);
52 int check_thresholds (int);
53 int convert_to_seconds (char *);
54 void print_help (void);
55 void print_usage (void);
62 int options
= 0; /* bitmask of filter criteria to test against */
73 /* Different metrics */
82 enum metric metric
= METRIC_PROCS
;
95 char tmp
[MAX_INPUT_BUFFER
];
100 main (int argc
, char **argv
)
115 char procetime
[MAX_INPUT_BUFFER
] = { '\0' };
118 const char *zombie
= "Z";
120 int resultsum
= 0; /* bitmask of the filter criteria met by a process */
121 int found
= 0; /* counter for number of lines returned in `ps` output */
122 int procs
= 0; /* counter for number of processes meeting filter criteria */
123 int pos
; /* number of spaces before 'args' in `ps` output */
124 int cols
; /* number of columns in ps output */
125 int expected_cols
= PS_COLS
- 1;
126 int warn
= 0; /* number of processes in warn state */
127 int crit
= 0; /* number of processes in crit state */
129 int result
= STATE_UNKNOWN
;
131 setlocale (LC_ALL
, "");
132 bindtextdomain (PACKAGE
, LOCALEDIR
);
133 textdomain (PACKAGE
);
134 setlocale(LC_NUMERIC
, "POSIX");
136 input_buffer
= malloc (MAX_INPUT_BUFFER
);
137 procprog
= malloc (MAX_INPUT_BUFFER
);
139 asprintf (&metric_name
, "PROCS");
140 metric
= METRIC_PROCS
;
142 if (process_arguments (argc
, argv
) == ERROR
)
143 usage4 (_("Could not parse arguments"));
148 /* Set signal handling and alarm timeout */
149 if (signal (SIGALRM
, popen_timeout_alarm_handler
) == SIG_ERR
) {
150 usage4 (_("Cannot catch SIGALRM"));
152 alarm (timeout_interval
);
155 printf (_("CMD: %s\n"), PS_COMMAND
);
157 child_process
= spopen (PS_COMMAND
);
158 if (child_process
== NULL
) {
159 printf (_("Could not open pipe: %s\n"), PS_COMMAND
);
160 return STATE_UNKNOWN
;
163 child_stderr
= fdopen (child_stderr_array
[fileno (child_process
)], "r");
164 if (child_stderr
== NULL
)
165 printf (_("Could not open stderr for %s\n"), PS_COMMAND
);
167 /* flush first line */
168 fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, child_process
);
169 while ( input_buffer
[strlen(input_buffer
)-1] != '\n' )
170 fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, child_process
);
172 while (fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, child_process
)) {
173 asprintf (&input_line
, "%s", input_buffer
);
174 while ( input_buffer
[strlen(input_buffer
)-1] != '\n' ) {
175 fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, child_process
);
176 asprintf (&input_line
, "%s%s", input_line
, input_buffer
);
180 printf ("%s", input_line
);
182 strcpy (procprog
, "");
183 asprintf (&procargs
, "%s", "");
185 cols
= sscanf (input_line
, PS_FORMAT
, PS_VARLIST
);
187 /* Zombie processes do not give a procprog command */
188 if ( cols
< expected_cols
&& strstr(procstat
, zombie
) ) {
189 cols
= expected_cols
;
191 if ( cols
>= expected_cols
) {
193 asprintf (&procargs
, "%s", input_line
+ pos
);
196 /* Some ps return full pathname for command. This removes path */
197 strcpy(procprog
, base_name(procprog
));
199 /* we need to convert the elapsed time to seconds */
200 procseconds
= convert_to_seconds(procetime
);
203 printf ("proc#=%d uid=%d vsz=%d rss=%d pid=%d ppid=%d pcpu=%.2f stat=%s etime=%s prog=%s args=%s\n",
204 procs
, procuid
, procvsz
, procrss
,
205 procpid
, procppid
, procpcpu
, procstat
,
206 procetime
, procprog
, procargs
);
209 if (mypid
== procpid
) continue;
211 if ((options
& STAT
) && (strstr (statopts
, procstat
)))
213 if ((options
& ARGS
) && procargs
&& (strstr (procargs
, args
) != NULL
))
215 if ((options
& PROG
) && procprog
&& (strcmp (prog
, procprog
) == 0))
217 if ((options
& PPID
) && (procppid
== ppid
))
219 if ((options
& USER
) && (procuid
== uid
))
221 if ((options
& VSZ
) && (procvsz
>= vsz
))
223 if ((options
& RSS
) && (procrss
>= rss
))
225 if ((options
& PCPU
) && (procpcpu
>= pcpu
))
230 /* Next line if filters not matched */
231 if (!(options
== resultsum
|| options
== ALL
))
236 if (metric
== METRIC_VSZ
)
237 i
= check_thresholds (procvsz
);
238 else if (metric
== METRIC_RSS
)
239 i
= check_thresholds (procrss
);
240 /* TODO? float thresholds for --metric=CPU */
241 else if (metric
== METRIC_CPU
)
242 i
= check_thresholds ((int)procpcpu
);
243 else if (metric
== METRIC_ELAPSED
)
244 i
= check_thresholds (procseconds
);
246 if (metric
!= METRIC_PROCS
) {
247 if (i
== STATE_WARNING
) {
249 asprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
250 result
= max_state (result
, i
);
252 if (i
== STATE_CRITICAL
) {
254 asprintf (&fails
, "%s%s%s", fails
, (strcmp(fails
,"") ? ", " : ""), procprog
);
255 result
= max_state (result
, i
);
259 /* This should not happen */
261 printf(_("Not parseable: %s"), input_buffer
);
265 /* If we get anything on STDERR, at least set warning */
266 while (fgets (input_buffer
, MAX_INPUT_BUFFER
- 1, child_stderr
)) {
268 printf ("STDERR: %s", input_buffer
);
269 result
= max_state (result
, STATE_WARNING
);
270 printf (_("System call sent warnings to stderr\n"));
273 (void) fclose (child_stderr
);
276 if (spclose (child_process
)) {
277 printf (_("System call returned nonzero status\n"));
278 result
= max_state (result
, STATE_WARNING
);
281 if (found
== 0) { /* no process lines parsed so return STATE_UNKNOWN */
282 printf (_("Unable to read output\n"));
286 if ( result
== STATE_UNKNOWN
)
289 /* Needed if procs found, but none match filter */
290 if ( metric
== METRIC_PROCS
) {
291 result
= max_state (result
, check_thresholds (procs
) );
294 if ( result
== STATE_OK
) {
295 printf ("%s %s: ", metric_name
, _("OK"));
296 } else if (result
== STATE_WARNING
) {
297 printf ("%s %s: ", metric_name
, _("WARNING"));
298 if ( metric
!= METRIC_PROCS
) {
299 printf (_("%d warn out of "), warn
);
301 } else if (result
== STATE_CRITICAL
) {
302 printf ("%s %s: ", metric_name
, _("CRITICAL"));
303 if (metric
!= METRIC_PROCS
) {
304 printf (_("%d crit, %d warn out of "), crit
, warn
);
307 printf (ngettext ("%d process", "%d processes", (unsigned long) procs
), procs
);
309 if (strcmp(fmt
,"") != 0) {
310 printf (_(" with %s"), fmt
);
313 if ( verbose
>= 1 && strcmp(fails
,"") )
314 printf (" [%s]", fails
);
322 /* process command-line arguments */
324 process_arguments (int argc
, char **argv
)
330 static struct option longopts
[] = {
331 {"warning", required_argument
, 0, 'w'},
332 {"critical", required_argument
, 0, 'c'},
333 {"metric", required_argument
, 0, 'm'},
334 {"timeout", required_argument
, 0, 't'},
335 {"status", required_argument
, 0, 's'},
336 {"ppid", required_argument
, 0, 'p'},
337 {"command", required_argument
, 0, 'C'},
338 {"vsz", required_argument
, 0, 'z'},
339 {"rss", required_argument
, 0, 'r'},
340 {"pcpu", required_argument
, 0, 'P'},
341 {"elapsed", required_argument
, 0, 'e'},
342 {"argument-array", required_argument
, 0, 'a'},
343 {"help", no_argument
, 0, 'h'},
344 {"version", no_argument
, 0, 'V'},
345 {"verbose", no_argument
, 0, 'v'},
349 for (c
= 1; c
< argc
; c
++)
350 if (strcmp ("-to", argv
[c
]) == 0)
351 strcpy (argv
[c
], "-t");
354 c
= getopt_long (argc
, argv
, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
357 if (c
== -1 || c
== EOF
)
366 case 'V': /* version */
367 print_revision (progname
, revision
);
369 case 't': /* timeout period */
370 if (!is_integer (optarg
))
371 usage2 (_("Timeout interval must be a positive integer"), optarg
);
373 timeout_interval
= atoi (optarg
);
375 case 'c': /* critical threshold */
376 if (is_integer (optarg
))
377 cmax
= atoi (optarg
);
378 else if (sscanf (optarg
, ":%d", &cmax
) == 1)
380 else if (sscanf (optarg
, "%d:%d", &cmin
, &cmax
) == 2)
382 else if (sscanf (optarg
, "%d:", &cmin
) == 1)
385 usage4 (_("Critical Process Count must be an integer!"));
387 case 'w': /* warning threshold */
388 if (is_integer (optarg
))
389 wmax
= atoi (optarg
);
390 else if (sscanf (optarg
, ":%d", &wmax
) == 1)
392 else if (sscanf (optarg
, "%d:%d", &wmin
, &wmax
) == 2)
394 else if (sscanf (optarg
, "%d:", &wmin
) == 1)
397 usage4 (_("Warning Process Count must be an integer!"));
399 case 'p': /* process id */
400 if (sscanf (optarg
, "%d%[^0-9]", &ppid
, tmp
) == 1) {
401 asprintf (&fmt
, "%s%sPPID = %d", (fmt
? fmt
: "") , (options
? ", " : ""), ppid
);
405 usage4 (_("Parent Process ID must be an integer!"));
406 case 's': /* status */
411 asprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
414 case 'u': /* user or user id */
415 if (is_integer (optarg
)) {
417 pw
= getpwuid ((uid_t
) uid
);
418 /* check to be sure user exists */
420 usage2 (_("UID was not found"), optarg
);
423 pw
= getpwnam (optarg
);
424 /* check to be sure user exists */
426 usage2 (_("User name was not found"), optarg
);
431 asprintf (&fmt
, "%s%sUID = %d (%s)", (fmt
? fmt
: ""), (options
? ", " : ""),
435 case 'C': /* command */
436 /* TODO: allow this to be passed in with --metric */
441 asprintf (&fmt
, _("%s%scommand name '%s'"), (fmt
? fmt
: ""), (options
? ", " : ""),
445 case 'a': /* args (full path name with args) */
446 /* TODO: allow this to be passed in with --metric */
451 asprintf (&fmt
, "%s%sargs '%s'", (fmt
? fmt
: ""), (options
? ", " : ""), args
);
455 if (sscanf (optarg
, "%d%[^0-9]", &rss
, tmp
) == 1) {
456 asprintf (&fmt
, "%s%sRSS >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), rss
);
460 usage4 (_("RSS must be an integer!"));
462 if (sscanf (optarg
, "%d%[^0-9]", &vsz
, tmp
) == 1) {
463 asprintf (&fmt
, "%s%sVSZ >= %d", (fmt
? fmt
: ""), (options
? ", " : ""), vsz
);
467 usage4 (_("VSZ must be an integer!"));
469 /* TODO: -P 1.5.5 is accepted */
470 if (sscanf (optarg
, "%f%[^0-9.]", &pcpu
, tmp
) == 1) {
471 asprintf (&fmt
, "%s%sPCPU >= %.2f", (fmt
? fmt
: ""), (options
? ", " : ""), pcpu
);
475 usage4 (_("PCPU must be a float!"));
477 asprintf (&metric_name
, "%s", optarg
);
478 if ( strcmp(optarg
, "PROCS") == 0) {
479 metric
= METRIC_PROCS
;
482 else if ( strcmp(optarg
, "VSZ") == 0) {
486 else if ( strcmp(optarg
, "RSS") == 0 ) {
490 else if ( strcmp(optarg
, "CPU") == 0 ) {
494 else if ( strcmp(optarg
, "ELAPSED") == 0) {
495 metric
= METRIC_ELAPSED
;
499 usage4 (_("Metric must be one of PROCS, VSZ, RSS, CPU, ELAPSED!"));
500 case 'v': /* command */
507 if (wmax
== -1 && argv
[c
])
508 wmax
= atoi (argv
[c
++]);
509 if (cmax
== -1 && argv
[c
])
510 cmax
= atoi (argv
[c
++]);
511 if (statopts
== NULL
&& argv
[c
]) {
512 asprintf (&statopts
, "%s", argv
[c
++]);
513 asprintf (&fmt
, _("%s%sSTATE = %s"), (fmt
? fmt
: ""), (options
? ", " : ""), statopts
);
517 return validate_arguments ();
523 validate_arguments ()
526 if (wmax
>= 0 && wmin
== -1)
528 if (cmax
>= 0 && cmin
== -1)
530 if (wmax
>= wmin
&& cmax
>= cmin
) { /* standard ranges */
531 if (wmax
> cmax
&& cmax
!= -1) {
532 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax
, cmax
);
535 if (cmin
> wmin
&& wmin
!= -1) {
536 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin
, cmin
);
541 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
542 /* printf ("At least one threshold must be set\n"); */
550 statopts
= strdup("");
569 /* Check thresholds against value */
571 check_thresholds (int value
)
573 if (wmax
== -1 && cmax
== -1 && wmin
== -1 && cmin
== -1) {
576 else if (cmax
>= 0 && cmin
>= 0 && cmax
< cmin
) {
577 if (value
> cmax
&& value
< cmin
)
578 return STATE_CRITICAL
;
580 else if (cmax
>= 0 && value
> cmax
) {
581 return STATE_CRITICAL
;
583 else if (cmin
>= 0 && value
< cmin
) {
584 return STATE_CRITICAL
;
587 if (wmax
>= 0 && wmin
>= 0 && wmax
< wmin
) {
588 if (value
> wmax
&& value
< wmin
) {
589 return STATE_WARNING
;
592 else if (wmax
>= 0 && value
> wmax
) {
593 return STATE_WARNING
;
595 else if (wmin
>= 0 && value
< wmin
) {
596 return STATE_WARNING
;
602 /* convert the elapsed time to seconds */
604 convert_to_seconds(char *etime
) {
623 for (ptr
= etime
; *ptr
!= '\0'; ptr
++) {
636 sscanf(etime
, "%d-%d:%d:%d",
637 &days
, &hours
, &minutes
, &seconds
);
638 /* linux 2.6.5/2.6.6 reporting some processes with infinite
639 * elapsed times for some reason */
645 sscanf(etime
, "%d:%d:%d",
646 &hours
, &minutes
, &seconds
);
647 } else if (coloncnt
== 1) {
648 sscanf(etime
, "%d:%d",
653 total
= (days
* 86400) +
658 if (verbose
>= 3 && metric
== METRIC_ELAPSED
) {
659 printf("seconds: %d\n", total
);
668 print_revision (progname
, revision
);
670 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>");
671 printf (COPYRIGHT
, copyright
, email
);
673 printf ("%s\n", _("Checks all processes and generates WARNING or CRITICAL states if the specified"));
674 printf ("%s\n", _("metric is outside the required threshold ranges. The metric defaults to number"));
675 printf ("%s\n", _("of processes. Search filters can be applied to limit the processes to check."));
681 printf ("%s\n", _("Required Arguments:"));
682 printf (" %s\n", "-w, --warning=RANGE");
683 printf (" %s\n", _("Generate warning state if metric is outside this range"));
684 printf (" %s\n", "-c, --critical=RANGE");
685 printf (" %s\n", _("Generate critical state if metric is outside this range"));
687 printf ("%s\n", _("Optional Arguments:"));
688 printf (" %s\n", "-m, --metric=TYPE");
689 printf (" %s\n", _("Check thresholds against metric. Valid types:"));
690 printf (" %s\n", _("PROCS - number of processes (default)"));
691 printf (" %s\n", _("VSZ - virtual memory size"));
692 printf (" %s\n", _("RSS - resident set memory size"));
693 printf (" %s\n", _("CPU - percentage cpu"));
694 /* only linux etime is support currently */
695 #if defined( __linux__ )
696 printf (" %s\n", _("ELAPSED - time elapsed in seconds"));
697 #endif /* defined(__linux__) */
698 printf (_(UT_TIMEOUT
), DEFAULT_SOCKET_TIMEOUT
);
700 printf (" %s\n", "-v, --verbose");
701 printf (" %s\n", _("Extra information. Up to 3 verbosity levels"));
703 printf ("%s\n", "Optional Filters:");
704 printf (" %s\n", "-s, --state=STATUSFLAGS");
705 printf (" %s\n", _("Only scan for processes that have, in the output of `ps`, one or"));
706 printf (" %s\n", _("more of the status flags you specify (for example R, Z, S, RS,"));
707 printf (" %s\n", _("RSZDT, plus others based on the output of your 'ps' command)."));
708 printf (" %s\n", "-p, --ppid=PPID");
709 printf (" %s\n", _("Only scan for children of the parent process ID indicated."));
710 printf (" %s\n", "-z, --vsz=VSZ");
711 printf (" %s\n", _("Only scan for processes with vsz higher than indicated."));
712 printf (" %s\n", "-r, --rss=RSS");
713 printf (" %s\n", _("Only scan for processes with rss higher than indicated."));
714 printf (" %s\n", "-P, --pcpu=PCPU");
715 printf (" %s\n", _("Only scan for processes with pcpu higher than indicated."));
716 printf (" %s\n", "-u, --user=USER");
717 printf (" %s\n", _("Only scan for processes with user name or ID indicated."));
718 printf (" %s\n", "-a, --argument-array=STRING");
719 printf (" %s\n", _("Only scan for processes with args that contain STRING."));
720 printf (" %s\n", "-C, --command=COMMAND");
721 printf (" %s\n", _("Only scan for exact matches of COMMAND (without path)."));
724 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
725 specified 'max:min', a warning status will be generated if the\n\
726 count is inside the specified range\n\n"));
729 This plugin checks the number of currently running processes and\n\
730 generates WARNING or CRITICAL states if the process count is outside\n\
731 the specified threshold ranges. The process count can be filtered by\n\
732 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
733 be the total number of running processes\n\n"));
735 printf ("%s\n", _("Examples:"));
736 printf (" %s\n", "check_procs -w 2:2 -c 2:1024 -C portsentry");
737 printf (" %s\n", _("Warning if not two processes with command name portsentry."));
738 printf (" %s\n\n", _("Critical if < 2 or > 1024 processes"));
739 printf (" %s\n", "check_procs -w 10 -a '/usr/local/bin/perl' -u root");
740 printf (" %s\n", _("Warning alert if > 10 processes with command arguments containing"));
741 printf (" %s\n\n", _("'/usr/local/bin/perl' and owned by root"));
742 printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ");
743 printf (" %s\n\n", _("Alert if vsz of any processes over 50K or 100K"));
744 printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU");
745 printf (" %s\n\n", _("Alert if cpu of any processes over 10%% or 20%%"));
747 printf (_(UT_SUPPORT
));
753 printf (_("Usage: "));
754 printf ("%s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n", progname
);
755 printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n");
756 printf (" [-C command] [-t timeout] [-v]\n");