showlog: Remove downtime handling code
[nagios-reports-module.git] / showlog.c
blob36d6a654815967ac0d46d02f10d745fa4067653c
1 #define _GNU_SOURCE 1
2 #include <stdio.h>
3 #include <unistd.h>
4 #include <sys/types.h>
5 #include <fcntl.h>
6 #include <stdarg.h>
7 #include <errno.h>
8 #include <string.h>
9 #include <signal.h>
11 #include <nagios/broker.h>
12 #include <nagios/nebcallbacks.h>
13 #include "hooks.h"
14 #include "logging.h"
15 #include "hash.h"
16 #include "lparse.h"
18 #define IGNORE_LINE 0
20 #define CONCERNS_HOST 50
21 #define CONCERNS_SERVICE 60
23 #define MAX_NVECS 16
24 #define HASH_TABLE_SIZE 128
26 /* for some reason these aren't defined inside Nagios' headers */
27 #define SERVICE_OK 0
28 #define SERVICE_WARNING 1
29 #define SERVICE_CRITICAL 2
30 #define SERVICE_UNKNOWN 3
32 struct naglog_file {
33 time_t first;
34 char *path;
35 size_t size;
36 size_t cmp;
39 static size_t imported, totsize, totlines;
40 struct timeval import_start;
41 static int debug_level;
42 static char **strv;
43 static time_t daemon_start, daemon_stop, incremental;
44 static int daemon_is_running;
45 static int num_nfile;
46 struct naglog_file *cur_file; /* the file we're currently importing */
47 static int line_no;
48 static int ignore_process_events;
50 static time_t first_time, last_time; /* first and last timestamp to show */
51 static time_t ltime; /* the timestamp from the current log-line */
53 struct string_code {
54 int nvecs;
55 const char *str;
56 size_t len;
57 int code;
60 struct downtime_entry {
61 int id;
62 int code;
63 char *host;
64 char *service;
65 time_t start;
66 time_t stop;
67 int fixed;
68 time_t duration;
69 time_t started;
70 time_t ended;
71 int purged;
72 int trigger;
73 int slot;
74 struct downtime_entry *next;
77 #define add_code(n, s, c) { n, s, sizeof(s) - 1, c, }
78 #define add_ignored(s) add_code(0, s, IGNORE_LINE)
79 struct string_code event_codes[] = {
80 add_ignored("Error"),
81 add_ignored("Warning"),
82 add_ignored("LOG ROTATION"),
83 add_ignored("HOST NOTIFICATION"),
84 add_ignored("HOST FLAPPING ALERT"),
85 add_ignored("SERVICE NOTIFICATION"),
86 add_ignored("SERVICE FLAPPING ALERT"),
87 add_ignored("SERVICE EVENT HANDLER"),
88 add_ignored("HOST EVENT HANDLER"),
89 add_ignored("LOG VERSION"),
91 add_code(0, "EXTERNAL COMMAND", NEBTYPE_EXTERNALCOMMAND_END),
92 add_code(5, "HOST ALERT", NEBTYPE_HOSTCHECK_PROCESSED),
93 add_code(5, "INITIAL HOST STATE", NEBTYPE_HOSTCHECK_PROCESSED),
94 add_code(5, "CURRENT HOST STATE", NEBTYPE_HOSTCHECK_PROCESSED),
95 add_code(6, "SERVICE ALERT", NEBTYPE_SERVICECHECK_PROCESSED),
96 add_code(6, "INITIAL SERVICE STATE", NEBTYPE_SERVICECHECK_PROCESSED),
97 add_code(6, "CURRENT SERVICE STATE", NEBTYPE_SERVICECHECK_PROCESSED),
98 add_code(3, "HOST DOWNTIME ALERT", NEBTYPE_DOWNTIME_LOAD + CONCERNS_HOST),
99 add_code(4, "SERVICE DOWNTIME ALERT", NEBTYPE_DOWNTIME_LOAD + CONCERNS_SERVICE),
100 { 0, NULL, 0, 0 },
103 #define DEL_HOST_DOWNTIME 1
104 #define DEL_SVC_DOWNTIME 2
105 #define SCHEDULE_AND_PROPAGATE_HOST_DOWNTIME 3
106 #define SCHEDULE_AND_PROPAGATE_TRIGGERED_HOST_DOWNTIME 4
107 #define SCHEDULE_HOSTGROUP_HOST_DOWNTIME 5
108 #define SCHEDULE_HOSTGROUP_SVC_DOWNTIME 6
109 #define SCHEDULE_HOST_DOWNTIME 7
110 #define SCHEDULE_HOST_SVC_DOWNTIME 8
111 #define SCHEDULE_SERVICEGROUP_HOST_DOWNTIME 9
112 #define SCHEDULE_SERVICEGROUP_SVC_DOWNTIME 10
113 #define SCHEDULE_SVC_DOWNTIME 11
114 #define ACKNOWLEDGE_HOST_PROBLEM 12
115 #define ACKNOWLEDGE_SVC_PROBLEM 13
117 #define add_cdef(__nvecs, __define) add_code(__nvecs, #__define, __define)
118 struct string_code command_codes[] = {
119 add_cdef(1, DEL_HOST_DOWNTIME),
120 add_cdef(1, DEL_SVC_DOWNTIME),
121 add_cdef(8, SCHEDULE_AND_PROPAGATE_HOST_DOWNTIME),
122 add_cdef(8, SCHEDULE_AND_PROPAGATE_TRIGGERED_HOST_DOWNTIME),
123 add_cdef(8, SCHEDULE_HOSTGROUP_HOST_DOWNTIME),
124 add_cdef(8, SCHEDULE_HOSTGROUP_SVC_DOWNTIME),
125 add_cdef(8, SCHEDULE_HOST_DOWNTIME),
126 add_cdef(8, SCHEDULE_HOST_SVC_DOWNTIME),
127 add_cdef(8, SCHEDULE_SERVICEGROUP_HOST_DOWNTIME),
128 add_cdef(8, SCHEDULE_SERVICEGROUP_SVC_DOWNTIME),
129 add_cdef(8, SCHEDULE_SVC_DOWNTIME),
132 * These really have one more field than listed here. We omit one
133 * to make author and comment concatenated with a semi-colon by default.
135 add_cdef(6, ACKNOWLEDGE_SVC_PROBLEM),
136 add_cdef(5, ACKNOWLEDGE_HOST_PROBLEM),
137 { 0, NULL, 0, 0 },
140 static inline void print_strvec(char **v, int n)
142 int i;
144 for (i = 0; i < n; i++)
145 printf("v[%2d]: %s\n", i, v[i]);
149 const char *tobytes(size_t n)
151 const char *suffix = "KMGT";
152 static char tbuf[2][30];
153 static int t = 0;
154 int shift = 1;
156 t ^= 1;
157 if (n < 1024) {
158 sprintf(tbuf[t], "%d bytes", n);
159 return tbuf[t];
162 while (n >> (shift * 10) > 1024)
163 shift++;
165 sprintf(tbuf[t], "%0.2f %ciB",
166 (float)n / (float)(1 << (shift * 10)), suffix[shift - 1]);
168 return tbuf[t];
171 static inline struct string_code *
172 get_string_code(struct string_code *codes, const char *str, size_t len)
174 int i;
176 for (i = 0; codes[i].str; i++)
177 if (codes[i].len == len && !memcmp(str, codes[i].str, len))
178 return &codes[i];
180 return NULL;
182 #define get_event_type(str, len) get_string_code(event_codes, str, len)
183 #define get_command_type(str, len) get_string_code(command_codes, str, len)
185 static void crash(const char *fmt, ...)
186 __attribute__((__format__(__printf__, 1, 2), __noreturn__));
188 static void __attribute__((__noreturn__)) crash(const char *fmt, ...)
190 va_list ap;
192 va_start(ap, fmt);
193 vfprintf(stderr, fmt, ap);
194 va_end(ap);
195 fputc('\n', stderr);
197 if (cur_file) {
198 fprintf(stderr, "crash() called when parsing line %d in %s\n",
199 line_no, cur_file->path);
202 exit(1);
205 static void pdebug(int lvl, const char *fmt, ...)
206 __attribute__((__format__(__printf__, 2, 3)));
207 #define debug(...) pdebug(1, __VA_ARGS__)
208 static void pdebug(int lvl, const char *fmt, ...)
210 va_list ap;
212 if (debug_level < lvl)
213 return;
215 va_start(ap, fmt);
216 vprintf(fmt, ap);
217 va_end(ap);
218 if (fmt[strlen(fmt) - 1] != '\n')
219 putchar('\n');
222 static void warn(const char *fmt, ...)
223 __attribute__((__format__(__printf__, 1, 2)));
225 static unsigned int warnings;
226 static void warn(const char *fmt, ...)
228 va_list ap;
230 warnings++;
232 if (!debug_level)
233 return;
235 printf("WARNING: ");
236 va_start(ap, fmt);
237 vprintf(fmt, ap);
238 va_end(ap);
239 putchar('\n');
243 #define prefixcmp(s1, s2) strncmp(s1, s2, strlen(s2))
244 static int is_interesting(const char *ptr)
246 if (!prefixcmp(ptr, "Auto-save of retention data"))
247 return 0;
248 if (!prefixcmp(ptr, "Event broker module"))
249 return 0;
250 if (!prefixcmp(ptr, "You do not have permission"))
251 return 0;
252 if (!prefixcmp(ptr, "Local time is"))
253 return 0;
255 return 1;
258 static int is_start_event(const char *ptr)
260 if (!prefixcmp(ptr, "Finished daemonizing..."))
261 return 1;
262 if (!prefixcmp(ptr, "PROGRAM_RESTART"))
263 return 1;
264 if (!prefixcmp(ptr, "Caught SIGHUP"))
265 return 1;
266 if (strstr(ptr, "starting..."))
267 return 1;
269 return 0;
272 static int is_stop_event(const char *ptr)
274 if (!prefixcmp(ptr, "Caught SIGTERM"))
275 return 1;
276 if (!prefixcmp(ptr, "Successfully shutdown..."))
277 return 1;
278 if (!prefixcmp(ptr, "Bailing out"))
279 return 1;
280 if (!prefixcmp(ptr, "Lockfile"))
281 return 1;
282 if (strstr(ptr, "shutting down..."))
283 return 1;
285 return 0;
288 struct unhandled_event {
289 char *file;
290 const char *line;
291 unsigned line_no;
292 struct unhandled_event *next;
295 static struct unhandled_event *event_list;
296 static int num_unhandled;
299 * This is a fairly toothless function, since we can encounter
300 * pretty much any kind of message in the logfiles. In order to
301 * make sure we don't miss anything important though, we should
302 * probably stash the messages away and print them at the end
303 * so the user can decide if he/she wants to make a re-import.
304 * In 99% of all cases, the user will just want to ignore the
305 * messages and keep going
307 static void handle_unknown_event(const char *line)
309 struct unhandled_event *event;
311 num_unhandled++;
313 if (!(event = malloc(sizeof(*event))) || !(event->line = strdup(line))) {
314 crash("Failed to allocate memory for unhandled event [%s]\n", line);
315 return;
318 event->line_no = line_no;
319 event->file = cur_file->path;
321 /* add to "top" of list. we'll print in reverse order */
322 event->next = event_list;
323 event_list = event;
326 static void print_unhandled_events()
328 struct unhandled_event *event;
329 int x = 1;
331 if (!num_unhandled)
332 return;
334 printf("\n%d Unhandled events encountered:\n" \
335 "------------------------------", num_unhandled);
337 for (x = 1; num_unhandled > (x * 10); x *= 10)
338 putchar('-');
340 putchar('\n');
341 for (event = event_list; event; event = event->next) {
342 printf("%s:%d:\n%s\n----\n", event->file, event->line_no, event->line);
346 static int vectorize_string(char *str, int nvecs)
348 char *p;
349 int i = 0;
351 strv[i++] = str;
352 for (p = str; *p && i < nvecs; p++) {
353 if (*p == ';') {
354 *p = 0;
355 strv[i++] = ++p;
359 return i;
362 static hash_table *interesting_hosts, *interesting_services;
363 static int host_is_interesting(const char *host)
365 if (interesting_hosts)
366 return !!hash_find(interesting_hosts, host);
368 return 1;
371 static int service_is_interesting(const char *host, const char *service)
373 /* fall back to just checking if host is interesting */
374 if (!service || !interesting_services)
375 return host_is_interesting(host);
377 return !!hash_find2(interesting_services, host, service);
380 static int strtotimet(const char *str, time_t *val)
382 char *endp;
384 *val = strtoul(str, &endp, 10);
385 if (endp == str) {
386 warn("strtotimet(): %s is not a valid time_t\n", str);
387 return -1;
390 return 0;
393 static int parse_line(char *line, size_t len)
395 char *ptr, *colon;
396 int nvecs = 0;
397 struct string_code *sc;
398 static time_t last_ltime = 0;
400 line_no++;
401 imported += len + 1; /* make up for 1 lost byte per newline */
403 /* ignore empty lines. whitespace is trimmed in the cfg_* api */
404 if (!len)
405 return 0;
407 /* skip obviously bogus lines */
408 if (len < 12 || *line != '[') {
409 warn("line %d; len too short, or line doesn't start with '[' (%s)", line_no, line);
410 return -1;
413 ltime = strtoul(line + 1, &ptr, 10);
414 if (line + 1 == ptr) {
415 crash("Failed to parse log timestamp from '%s'. I can't handle malformed logdata", line);
416 return -1;
419 /* only print lines in the interesting interval */
420 if ((first_time && ltime < first_time) || (last_time && ltime > last_time))
421 return 0;
423 /* more heuristics should go below, but we remain lazy for now */
424 puts(line);
425 return 0;
427 if (ltime < last_ltime) {
428 ltime = last_ltime;
430 else
431 last_ltime = ltime;
434 * Incremental will be 0 if not set, or 1 if set but
435 * the database is currently empty.
436 * Note that this will not always do the correct thing,
437 * as downtime entries that might have been scheduled for
438 * purging may never show up as "stopped" in the database
439 * with this scheme. As such, incremental imports absolutely
440 * require that nothing is in scheduled downtime when the
441 * import is running (well, started really, but it amounts
442 * to the same thing).
444 if (ltime < incremental)
445 return 0;
447 while (*ptr == ']' || *ptr == ' ')
448 ptr++;
450 if (!is_interesting(ptr))
451 return 0;
453 if (!(colon = strchr(ptr, ':'))) {
454 /* stupid heuristic, but might be good for something,
455 * somewhere, sometime. if nothing else, it should suppress
456 * annoying output */
457 if (is_start_event(ptr)) {
458 daemon_start = ltime;
459 daemon_is_running = 1;
460 return 0;
462 if (is_stop_event(ptr)) {
463 daemon_is_running = 0;
464 daemon_stop = ltime;
465 return 0;
469 * An unhandled event. We should probably crash here
471 handle_unknown_event(line);
472 return -1;
475 /* an event happened without us having gotten a start-event */
476 if (!daemon_is_running) {
477 daemon_start = ltime;
478 daemon_is_running = 1;
481 if (!(sc = get_event_type(ptr, colon - ptr))) {
482 handle_unknown_event(line);
483 return -1;
486 if (sc->code == IGNORE_LINE)
487 return 0;
489 *colon = 0;
490 ptr = colon + 1;
491 while (*ptr == ' ')
492 ptr++;
494 if (sc->nvecs) {
495 int i;
497 nvecs = vectorize_string(ptr, sc->nvecs);
499 if (nvecs != sc->nvecs) {
500 /* broken line */
501 warn("Line %d in %s seems to not have all the fields it should",
502 line_no, cur_file->path);
503 return -1;
506 for (i = 0; i < sc->nvecs; i++) {
507 if (!strv[i]) {
508 /* this should never happen */
509 warn("Line %d in %s seems to be broken, or we failed to parse it into a vector",
510 line_no, cur_file->path);
511 return -1;
516 switch (sc->code) {
517 char *semi_colon;
519 case NEBTYPE_EXTERNALCOMMAND_END:
520 semi_colon = strchr(ptr, ';');
521 if (!semi_colon)
522 return 0;
523 if (!(sc = get_command_type(ptr, semi_colon - ptr))) {
524 return 0;
527 nvecs = vectorize_string(semi_colon + 1, sc->nvecs);
528 if (nvecs != sc->nvecs) {
529 warn("nvecs discrepancy: %d vs %d (%s)\n", nvecs, sc->nvecs, ptr);
531 break;
533 case NEBTYPE_HOSTCHECK_PROCESSED:
534 case NEBTYPE_SERVICECHECK_PROCESSED:
535 puts(line);
537 case NEBTYPE_DOWNTIME_LOAD + CONCERNS_HOST:
538 case NEBTYPE_DOWNTIME_LOAD + CONCERNS_SERVICE:
539 break;
541 case IGNORE_LINE:
542 return 0;
545 return 0;
549 * Returns an increasing numeric value for a nagios logfile
550 * For a file with a name such as:
551 * nagios-12-01-2002-00.log
552 * it will return
553 * 2002120100
555 #define NUM_PARTS 4
556 static size_t path_cmp_number(char *path)
558 size_t ret, len = strlen(path);
559 char *dash = NULL;
560 int i;
561 unsigned long part[NUM_PARTS];
563 if (len < 18 || strcmp(&path[len - 4], ".log"))
564 return 0;
565 dash = strrchr(path, '/');
566 if (!dash)
567 dash = path;
569 dash++;
571 * we special-case nagios.log as always being the
572 * last file to be parsed. It has to be, since it's
573 * the currently active logfile
575 if (!strcmp(dash, "nagios.log") || num_nfile == 1)
576 return ~0;
578 for (i = 0; i < NUM_PARTS; i++) {
579 char *endp;
581 dash = strchr(dash, '-');
582 if (!dash)
583 crash("dash is not");
585 dash++;
586 part[i] = strtoul(dash, &endp, 10);
587 if (!part[i] && dash == endp)
588 return 0;
589 if (!endp)
590 return 0;
591 dash = endp;
593 if (part[0] < 1 || part[0] > 12)
594 return 0;
595 if (part[1] < 1 || part[1] > 31)
596 return 0;
597 if (part[2] < 2000 || part[2] > 2008)
598 return 0;
599 ret = part[2] * 1000000;
600 ret += part[0] * 10000;
601 ret += part[1] * 100;
602 ret += part[3];
604 return ret;
607 #define min(a, b) ((a) < (b) ? (a) : (b))
608 static void first_log_time(struct naglog_file *nf)
610 int fd, i = 0;
611 char buf[1024];
612 struct stat st;
614 if (!(fd = open(nf->path, O_RDONLY)))
615 crash("Failed to open %s: %s", nf->path, strerror(errno));
618 * since we're looking at every file in here anyway,
619 * we also determine the size of them so we can do an
620 * arena allocation large enough to fit the largest
621 * file + an added newline later
623 if (fstat(fd, &st) < 0)
624 crash("Failed to stat %s: %s", nf->path, strerror(errno));
626 nf->size = st.st_size;
628 if (read(fd, buf, sizeof(buf)) < min(sizeof(buf), st.st_size))
629 crash("Incomplete read of %s", nf->path);
631 buf[sizeof(buf) - 1] = 0;
632 /* skip empty lines at top of file */
633 while (i < sizeof(buf) - 12 && (buf[i] == '\n' || buf[i] == '\r'))
634 i++;
636 if (strtotimet(buf + i + 1, &nf->first))
637 crash("'%s' has no timestamp for us to parse", buf);
639 nf->cmp = path_cmp_number(nf->path);
640 close(fd);
643 int nfile_cmp(const void *p1, const void *p2)
645 const struct naglog_file *a = p1;
646 const struct naglog_file *b = p2;
648 if (a->first > b->first)
649 return 1;
650 if (b->first > a->first)
651 return -1;
653 if (a->cmp > b->cmp)
654 return 1;
655 if (b->cmp > a->cmp)
656 return -1;
658 crash("Two files with same 'first' and 'cmp'? Bizarre...");
660 return 0;
665 * hashes one line from an "interesting"-file. We use (void *)1
666 * to mark this as "present in hash-table" as we have no real
667 * data to lookup but still want hash_find{,2} to return non-NULL
668 * when it finds a match
670 static int hash_one_line(char *line, size_t len)
672 char *p;
674 p = strchr(line, ';');
675 if (p) {
676 *p++ = 0;
677 if (!interesting_services)
678 interesting_services = hash_init(16384);
679 hash_add2(interesting_services, line, p, (void *)1);
681 else {
682 if (!interesting_hosts)
683 interesting_hosts = hash_init(16384);
685 hash_add(interesting_hosts, line, (void *)1);
688 return 0;
691 static int hash_interesting(const char *path)
693 struct stat st;
695 if (stat(path, &st) < 0)
696 crash("failed to stat %s: %s", path, strerror(errno));
698 lparse_path(path, st.st_size, hash_one_line);
700 return 0;
703 extern const char *__progname;
704 int main(int argc, char **argv)
706 int i;
707 struct naglog_file *nfile;
709 strv = calloc(sizeof(char *), MAX_NVECS);
710 nfile = calloc(sizeof(*nfile), argc - 1);
711 if (!strv || !nfile)
712 crash("Failed to alloc initial structs");
715 for (num_nfile = 0,i = 1; i < argc; i++) {
716 char *opt, *arg = argv[i];
717 struct naglog_file *nf;
718 int eq_opt = 0;
720 if ((opt = strchr(arg, '='))) {
721 *opt++ = '\0';
722 eq_opt = 1;
724 else if (i < argc - 1) {
725 opt = argv[i + 1];
728 if (!prefixcmp(arg, "--ignore-process-events")) {
729 ignore_process_events = 1;
730 continue;
732 if (!prefixcmp(arg, "--debug") || !prefixcmp(arg, "-d")) {
733 debug_level++;
734 continue;
736 if (!prefixcmp(arg, "--interesting") || !prefixcmp(arg, "-i")) {
737 if (!opt || !*opt)
738 crash("%s requires a filename as argument", arg);
739 hash_interesting(opt);
740 if (opt && !eq_opt)
741 i++;
742 continue;
744 if (!prefixcmp(arg, "--first") || !prefixcmp(arg, "--last")) {
745 time_t when;
747 if (!opt || !*opt)
748 crash("%s requires a timestamp as argument", arg);
749 when = strtoul(opt, NULL, 0);
750 if (opt && !eq_opt)
751 i++;
752 if (!prefixcmp(arg, "--first"))
753 first_time = when;
754 else
755 last_time = when;
756 continue;
759 /* non-argument, so treat as file */
760 nf = &nfile[num_nfile++];
761 nf->path = arg;
762 first_log_time(nf);
763 totsize += nf->size;
766 log_grok_var("logfile", "/dev/null");
767 log_grok_var("log_levels", "warn");
769 if (!num_nfile)
770 crash("Usage: %s [--incremental] [--interesting <file>] [--truncate-db] logfiles\n",
771 __progname);
773 if (log_init() < 0)
774 crash("log_init() failed");
776 qsort(nfile, num_nfile, sizeof(*nfile), nfile_cmp);
778 gettimeofday(&import_start, NULL);
780 for (i = 0; i < num_nfile; i++) {
781 struct naglog_file *nf = &nfile[i];
782 if (last_time && nf->first > last_time) {
783 debug("ignoring %s\n", nf->path);
784 continue;
786 if (first_time && i < num_nfile - 1 && nfile[i + 1].first < first_time) {
787 debug("ignoring %s\n", nf->path);
788 continue;
791 cur_file = nf;
792 debug("importing from %s (%lu : %u)\n", nf->path, nf->first, nf->cmp);
793 line_no = 0;
794 lparse_path(nf->path, nf->size, parse_line);
795 totlines += line_no;
796 imported++; /* make up for one lost byte per file */
799 if (warnings && debug_level)
800 fprintf(stderr, "Total warnings: %d\n", warnings);
802 print_unhandled_events();
804 return 0;