db_updater: Put parentheses back
[merlin.git] / import.c
blob3b1f1cbff63480b81a7feda2ad6cf907d9b70d07
1 #define _GNU_SOURCE 1
2 #include <sys/types.h>
3 #include <signal.h>
5 #include <nagios/broker.h>
6 #include <nagios/nebcallbacks.h>
7 #include "shared.h"
8 #include "sql.h"
9 #include "state.h"
10 #include "lparse.h"
11 #include "logutils.h"
12 #include "cfgfile.h"
13 #include <stdint.h> /* standard fixed-size integer types. */
14 #include <inttypes.h> /* PRIxxx printf specifiers. */
15 #define IGNORE_LINE 0
17 #define CONCERNS_HOST 50
18 #define CONCERNS_SERVICE 60
20 #define MAX_NVECS 16
21 #define HASH_TABLE_SIZE 128
23 /* for some reason these aren't defined inside Nagios' headers */
24 #define SERVICE_OK 0
25 #define SERVICE_WARNING 1
26 #define SERVICE_CRITICAL 2
27 #define SERVICE_UNKNOWN 3
29 #define PROGRESS_INTERVAL 25000 /* lines to parse between progress updates */
32 static const char *progname;
33 static char *db_table;
34 static int only_notifications;
35 static unsigned long long imported, totsize, totlines, skipped;
36 static int lines_since_progress, do_progress, list_files;
37 static struct timeval import_start;
38 static time_t daemon_start, daemon_stop, incremental;
39 static int daemon_is_running;
40 static uint max_dt_depth, skipped_files;
41 static int repair_table;
43 static time_t next_dt_purge; /* when next to purge expired downtime */
44 #define DT_PURGE_GRACETIME 300 /* seconds to add to next_dt_purge */
46 static time_t ltime; /* the timestamp from the current log-line */
48 static uint dt_start, dt_stop, dt_skip;
49 #define dt_depth (dt_start - dt_stop)
50 static dkhash_table *host_downtime;
51 static dkhash_table *service_downtime;
52 static int downtime_id;
54 struct downtime_entry {
55 int id;
56 int code;
57 char *host;
58 char *service;
59 time_t start;
60 time_t stop;
61 int fixed;
62 time_t duration;
63 time_t started;
64 time_t ended;
65 int purged;
66 int trigger;
67 int slot;
68 struct downtime_entry *next;
71 #define NUM_DENTRIES 1024
72 static struct downtime_entry **dentry;
73 static time_t last_downtime_start;
75 static struct string_code event_codes[] = {
76 add_ignored("Error"),
77 add_ignored("Warning"),
78 add_ignored("LOG ROTATION"),
79 add_ignored("HOST FLAPPING ALERT"),
80 add_ignored("SERVICE FLAPPING ALERT"),
81 add_ignored("SERVICE EVENT HANDLER"),
82 add_ignored("HOST EVENT HANDLER"),
83 add_ignored("LOG VERSION"),
84 add_ignored("livestatus"),
85 add_ignored("TIMEPERIOD TRANSITION"),
86 add_ignored("wproc"),
87 add_ignored("qh"),
88 add_ignored("nerd"),
90 add_code(5, "HOST NOTIFICATION", NEBTYPE_NOTIFICATION_END + CONCERNS_HOST),
91 add_code(6, "SERVICE NOTIFICATION", NEBTYPE_NOTIFICATION_END + CONCERNS_SERVICE),
92 add_code(3, "PASSIVE HOST CHECK", NEBTYPE_HOSTCHECK_PROCESSED),
93 add_code(4, "PASSIVE SERVICE CHECK", NEBTYPE_SERVICECHECK_PROCESSED),
94 add_code(0, "EXTERNAL COMMAND", NEBTYPE_EXTERNALCOMMAND_END),
95 add_code(5, "HOST ALERT", NEBTYPE_HOSTCHECK_PROCESSED),
96 add_code(5, "INITIAL HOST STATE", NEBTYPE_HOSTCHECK_PROCESSED),
97 add_code(5, "CURRENT HOST STATE", NEBTYPE_HOSTCHECK_PROCESSED),
98 add_code(6, "SERVICE ALERT", NEBTYPE_SERVICECHECK_PROCESSED),
99 add_code(6, "INITIAL SERVICE STATE", NEBTYPE_SERVICECHECK_PROCESSED),
100 add_code(6, "CURRENT SERVICE STATE", NEBTYPE_SERVICECHECK_PROCESSED),
101 add_code(3, "HOST DOWNTIME ALERT", NEBTYPE_DOWNTIME_LOAD + CONCERNS_HOST),
102 add_code(4, "SERVICE DOWNTIME ALERT", NEBTYPE_DOWNTIME_LOAD + CONCERNS_SERVICE),
103 { 0, NULL, 0, 0 },
106 static struct string_code command_codes[] = {
107 add_cdef(1, DEL_HOST_DOWNTIME),
108 add_cdef(1, DEL_SVC_DOWNTIME),
109 add_cdef(8, SCHEDULE_AND_PROPAGATE_HOST_DOWNTIME),
110 add_cdef(8, SCHEDULE_AND_PROPAGATE_TRIGGERED_HOST_DOWNTIME),
111 add_cdef(8, SCHEDULE_HOSTGROUP_HOST_DOWNTIME),
112 add_cdef(8, SCHEDULE_HOSTGROUP_SVC_DOWNTIME),
113 add_cdef(8, SCHEDULE_HOST_DOWNTIME),
114 add_cdef(8, SCHEDULE_HOST_SVC_DOWNTIME),
115 add_cdef(8, SCHEDULE_SERVICEGROUP_HOST_DOWNTIME),
116 add_cdef(8, SCHEDULE_SERVICEGROUP_SVC_DOWNTIME),
117 add_cdef(8, SCHEDULE_SVC_DOWNTIME),
120 * These really have one more field than listed here. We omit one
121 * to make author and comment concatenated with a semi-colon by default.
123 add_cdef(6, ACKNOWLEDGE_SVC_PROBLEM),
124 add_cdef(5, ACKNOWLEDGE_HOST_PROBLEM),
125 { 0, NULL, 0, 0 },
129 static void handle_sql_result(int errors, const char *table)
131 if (!errors || !sql_table_crashed)
132 return;
134 if (repair_table) {
135 printf("Repairing table '%s'. This may take a very long time. Please be patient\n", table);
136 sql_repair_table(table);
138 else {
139 crash("Database table '%s' appears to have crashed. Please run\n mysqlrepair %s.%s",
140 table, sql_db_name(), table);
144 static int insert_host_result(nebstruct_host_check_data *ds)
146 int result;
147 char *host_name = NULL, *output = NULL;
149 if (!host_has_new_state(ds->host_name, ds->state, ds->state_type)) {
150 linfo("state not changed for host '%s'", ds->host_name);
151 return 0;
154 sql_quote(ds->host_name, &host_name);
155 sql_quote(ds->output, &output);
156 result = sql_query
157 ("INSERT INTO %s("
158 "timestamp, event_type, host_name, state, "
159 "hard, retry, output"
160 ") VALUES(%lu, %d, %s, %d, %d, %d, %s)",
161 db_table,
162 ds->timestamp.tv_sec, ds->type, host_name, ds->state,
163 ds->state_type == HARD_STATE, ds->current_attempt,
164 output);
166 free(host_name);
167 free(output);
169 return result;
172 static int insert_service_result(nebstruct_service_check_data *ds)
174 int result;
175 char *host_name, *service_description, *output;
177 if (!service_has_new_state(ds->host_name, ds->service_description, ds->state, ds->state_type)) {
178 linfo("state not changed for service '%s' on host '%s'",
179 ds->service_description, ds->host_name);
180 return 0;
183 sql_quote(ds->host_name, &host_name);
184 sql_quote(ds->service_description, &service_description);
185 sql_quote(ds->output, &output);
186 result = sql_query
187 ("INSERT INTO %s ("
188 "timestamp, event_type, host_name, service_description, state, "
189 "hard, retry, output) "
190 "VALUES(%lu, %d, %s, %s, '%d', '%d', '%d', %s)",
191 db_table,
192 ds->timestamp.tv_sec, ds->type, host_name,
193 service_description, ds->state,
194 ds->state_type == HARD_STATE, ds->current_attempt,
195 output);
196 free(host_name);
197 free(service_description);
198 free(output);
199 return result;
202 static int sql_insert_downtime(nebstruct_downtime_data *ds)
204 int depth = 0, result;
205 char *host_name, *service_description;
207 switch (ds->type) {
208 case NEBTYPE_DOWNTIME_START:
210 * If downtime is starting, it will always be at least
211 * 1 deep. Since the report UI doesn't care about the
212 * actual depth but only whether downtime is in effect
213 * or not we can get away with cheating here.
215 depth = 1;
216 case NEBTYPE_DOWNTIME_STOP:
217 break;
218 case NEBTYPE_DOWNTIME_DELETE:
220 * if we're deleting a downtime that hasn't started yet, nothing
221 * should be added to the database. Otherwise, transform it to a
222 * NEBTYPE_DOWNTIME_STOP event to mark the downtime as stopped.
224 if (ds->start_time > time(NULL))
225 return 0;
226 ds->type = NEBTYPE_DOWNTIME_STOP;
227 break;
228 default:
229 return 0;
232 sql_quote(ds->host_name, &host_name);
233 if (ds->service_description) {
234 sql_quote(ds->service_description, &service_description);
236 result = sql_query
237 ("INSERT INTO %s("
238 "timestamp, event_type, host_name,"
239 "service_description, downtime_depth) "
240 "VALUES(%lu, %d, %s, %s, %d)",
241 db_table,
242 ds->timestamp.tv_sec, ds->type, host_name,
243 service_description, depth);
244 free(service_description);
245 } else {
246 result = sql_query
247 ("INSERT INTO %s("
248 "timestamp, event_type, host_name, downtime_depth)"
249 "VALUES(%lu, %d, %s, %d)",
250 db_table,
251 ds->timestamp.tv_sec, ds->type, host_name, depth);
253 free(host_name);
254 return result;
257 static int insert_process_data(nebstruct_process_data *ds)
259 switch(ds->type) {
260 case NEBTYPE_PROCESS_START:
261 case NEBTYPE_PROCESS_SHUTDOWN:
262 break;
263 case NEBTYPE_PROCESS_RESTART:
264 ds->type = NEBTYPE_PROCESS_SHUTDOWN;
265 break;
266 default:
267 return 0;
270 return sql_query
271 ("INSERT INTO %s(timestamp, event_type) "
272 "VALUES(%lu, %d)",
273 db_table, ds->timestamp.tv_sec, ds->type);
276 static inline void print_strvec(char **v, int n)
278 int i;
280 for (i = 0; i < n; i++)
281 printf("v[%2d]: %s\n", i, v[i]);
285 static void show_progress(void)
287 time_t eta, elapsed;
288 float pct_done, real_pct_done;
290 totlines += lines_since_progress;
291 lines_since_progress = 0;
293 if (!do_progress)
294 return;
296 elapsed = time(NULL) - import_start.tv_sec;
297 if (!elapsed)
298 elapsed = 1;
300 real_pct_done = (float)imported / (float)(totsize - skipped) * 100;
301 pct_done = ((float)(imported + skipped) / (float)totsize) * 100;
302 eta = (elapsed / real_pct_done) * (100.0 - real_pct_done);
304 printf("Importing data: %.2f%% (%s) done ",
305 pct_done, human_bytes(imported + skipped));
306 if (elapsed > 10) {
307 printf("ETA: ");
308 if (eta > 60)
309 printf("%lum%lus", eta / 60, eta % 60);
310 else
311 printf("%lus", eta);
313 printf(" \r");
314 fflush(stdout);
317 static void end_progress(void)
319 struct timeval tv;
321 if (list_files)
322 return;
324 gettimeofday(&tv, NULL);
327 * If any of the logfiles doesn't have a newline
328 * at end of file, imported will be slightly off.
329 * We set it hard here so as to make sure that
330 * the final progress output stops at exactly 100%
332 imported = totsize - skipped;
334 show_progress();
335 putchar('\n');
336 printf("%s, %llu lines imported in %s.",
337 human_bytes(totsize), totlines, tv_delta(&import_start, &tv));
338 if (skipped)
339 printf(" %s in %u files skipped.", human_bytes(skipped), skipped_files);
340 putchar('\n');
343 static int indexes_disabled;
344 static void disable_indexes(void)
346 if (indexes_disabled)
347 return;
350 * if we're more than 95% done before inserting anything,
351 * such as might be the case when running an incremental
352 * import, we might as well not bother with disabling
353 * the indexes, since enabling them again can take quite
354 * a long time
356 if (((float)(skipped + imported) / (float)totsize) * 100 >= 95.0)
357 return;
360 * We lock the table we'll be working with and disable
361 * indexes on it. Otherwise doing the actual inserts
362 * will take just about forever, as MySQL has to update
363 * and flush the index cache between each operation.
365 if (sql_query("ALTER TABLE %s DISABLE KEYS", db_table))
366 crash("Failed to disable keys: %s", sql_error_msg());
367 if (sql_query("LOCK TABLES %s WRITE, report_data_extras WRITE", db_table))
368 crash("Failed to lock table %s: %s", db_table, sql_error_msg());
370 indexes_disabled = 1;
373 static void insert_extras(void)
375 sql_query("INSERT INTO %s (`timestamp`, `event_type`, `flags`, `attrib`, `host_name`, `service_description`, `state`, `hard`, `retry`, `downtime_depth`, `output`) SELECT `timestamp`, `event_type`, `flags`, `attrib`, `host_name`, `service_description`, `state`, `hard`, `retry`, `downtime_depth`, `output` FROM report_data_extras;", db_table);
378 static void enable_indexes(void)
380 db_wrap_result *result = NULL;
381 int64_t entries;
382 time_t start;
384 /* if we haven't disabled the indexes we can quit early */
385 if (!indexes_disabled)
386 return;
388 sql_query("SELECT count(1) FROM %s", db_table);
389 if (!(result = sql_get_result()))
390 entries = 0;
391 else {
392 if (0 == result->api->step(result)) {
393 result->api->get_int64_ndx(result, 0, &entries);
394 } else {
395 entries = 0;
397 sql_free_result();
400 signal(SIGINT, SIG_IGN);
401 sql_query("UNLOCK TABLES");
402 start = time(NULL);
403 printf("Creating sql table indexes. This will likely take ~%"PRIi64" seconds\n",
404 (entries / 50000) + 1);
405 sql_query("ALTER TABLE %s ENABLE KEYS", db_table);
406 printf("%lu database entries indexed in %lu seconds\n",
407 entries, time(NULL) - start);
410 static int insert_downtime_event(int type, char *host, char *service, int id)
412 nebstruct_downtime_data ds;
413 int result;
415 if (!is_interesting_service(host, service))
416 return 0;
418 dt_start += type == NEBTYPE_DOWNTIME_START;
419 dt_stop += type == NEBTYPE_DOWNTIME_STOP;
420 if (dt_depth > max_dt_depth)
421 max_dt_depth = dt_depth;
423 if (!use_database || only_notifications)
424 return 0;
426 memset(&ds, 0, sizeof(ds));
428 ds.type = type;
429 ds.timestamp.tv_sec = ltime;
430 ds.host_name = host;
431 ds.service_description = service;
432 ds.downtime_id = id;
434 disable_indexes();
435 result = sql_insert_downtime(&ds);
436 if (result < 0)
437 lp_crash("Failed to insert downtime:\n type=%d, host=%s, service=%s, id=%d",
438 type, host, service, id);
440 return result;
443 typedef struct import_notification {
444 int type, reason, state;
445 } import_notification;
447 static int parse_import_notification(char *str, import_notification *n)
449 char *state_str = str;
451 n->reason = parse_notification_reason(str);
452 if (n->reason != NOTIFICATION_NORMAL) {
453 char *space, *paren;
455 space = strchr(str, ' ');
456 if (!space)
457 return -1;
458 paren = strchr(space, ')');
459 if (!paren)
460 return -1;
461 *paren = '\0';
463 state_str = space + 2;
466 n->type = SERVICE_NOTIFICATION;
467 n->state = parse_service_state_gently(state_str);
468 if (n->state < 0) {
469 n->type = HOST_NOTIFICATION;
470 n->state = parse_host_state_gently(state_str);
473 return 0;
476 static int insert_notification(struct string_code *sc)
478 int base_idx, result;
479 char *contact_name, *host_name, *service_description;
480 char *command_name, *output;
481 struct import_notification n;
483 if (!only_notifications)
484 return 0;
486 if (sc->code - NEBTYPE_NOTIFICATION_END == CONCERNS_SERVICE) {
487 base_idx = 1;
488 } else {
489 base_idx = 0;
491 if (parse_import_notification(strv[base_idx + 2], &n) < 0) {
492 handle_unknown_event(strv[base_idx + 2]);
493 return 0;
496 if (!use_database)
497 return 0;
499 disable_indexes();
500 sql_quote(strv[0], &contact_name);
501 sql_quote(strv[1], &host_name);
502 if (base_idx) {
503 sql_quote(strv[2], &service_description);
504 } else {
505 service_description = NULL;
507 sql_quote(strv[base_idx + 3], &command_name);
508 sql_quote(strv[base_idx + 4], &output);
509 result = sql_query
510 ("INSERT INTO %s("
511 "notification_type, start_time, end_time, contact_name, "
512 "host_name, service_description, "
513 "command_name, output, "
514 "state, reason_type) "
515 "VALUES("
516 "%d, %lu, %lu, %s, "
517 "%s, %s, "
518 "%s, %s, "
519 "%d, %d)",
520 db_table,
521 n.type, ltime, ltime, contact_name,
522 host_name, safe_str(service_description),
523 command_name, output,
524 n.state, n.reason);
525 free(contact_name);
526 free(host_name);
527 safe_free(service_description);
528 free(command_name);
529 free(output);
530 return result;
533 static int insert_service_check(struct string_code *sc)
535 nebstruct_service_check_data ds;
537 if (!is_interesting_service(strv[0], strv[1]))
538 return 0;
540 memset(&ds, 0, sizeof(ds));
542 ds.timestamp.tv_sec = ltime;
543 ds.type = sc->code;
544 ds.host_name = strv[0];
545 ds.service_description = strv[1];
546 if (sc->nvecs == 4) {
547 /* passive service check result */
548 if (*strv[2] >= '0' && *strv[2] <= '9')
549 ds.state = atoi(strv[2]);
550 else
551 ds.state = parse_service_state(strv[2]);
552 ds.state_type = HARD_STATE;
553 ds.current_attempt = 1;
554 ds.output = strv[3];
555 } else {
556 ds.state = parse_service_state(strv[2]);
557 ds.state_type = soft_hard(strv[3]);
558 ds.current_attempt = atoi(strv[4]);
559 ds.output = strv[5];
562 if (!use_database || only_notifications)
563 return 0;
565 disable_indexes();
566 return insert_service_result(&ds);
569 static int insert_host_check(struct string_code *sc)
571 nebstruct_host_check_data ds;
573 if (!is_interesting_host(strv[0]))
574 return 0;
576 memset(&ds, 0, sizeof(ds));
578 ds.timestamp.tv_sec = ltime;
579 ds.type = sc->code;
580 ds.host_name = strv[0];
581 if (sc->nvecs == 3) {
582 if (*strv[1] >= '0' && *strv[1] <= '9')
583 ds.state = atoi(strv[1]);
584 else
585 ds.state = parse_host_state(strv[1]);
586 /* passive host check result */
587 ds.output = strv[2];
588 ds.current_attempt = 1;
589 ds.state_type = HARD_STATE;
590 } else {
591 ds.state = parse_host_state(strv[1]);
592 ds.state_type = soft_hard(strv[2]);
593 ds.current_attempt = atoi(strv[3]);
594 ds.output = strv[4];
597 if (!use_database || only_notifications)
598 return 0;
600 disable_indexes();
601 return insert_host_result(&ds);
604 static int insert_process_event(int type)
606 nebstruct_process_data ds;
608 if (!use_database || only_notifications)
609 return 0;
611 memset(&ds, 0, sizeof(ds));
612 ds.timestamp.tv_sec = ltime;
613 ds.type = type;
614 disable_indexes();
615 return insert_process_data(&ds);
618 #if 0
619 static int insert_acknowledgement(struct string_code *sc)
621 return 0;
623 #else
624 # define insert_acknowledgement(foo) /* nothing */ ;
625 #endif
627 static void dt_print(char *tpc, time_t when, struct downtime_entry *dt)
629 if (!debug_level)
630 return;
632 printf("%s: time=%lu started=%lu start=%lu stop=%lu duration=%lu id=%d ",
633 tpc, when, dt->started, dt->start, dt->stop, dt->duration, dt->id);
634 printf("%s", dt->host);
635 if (dt->service)
636 printf(";%s", dt->service);
637 putchar('\n');
640 static struct downtime_entry *last_dte;
641 static struct downtime_entry *del_dte;
643 static void remove_downtime(struct downtime_entry *dt);
644 static int del_matching_dt(void *data)
646 struct downtime_entry *dt = data;
648 if (del_dte->id == dt->id) {
649 dt_print("ALSO", 0, dt);
650 remove_downtime(dt);
651 return DKHASH_WALK_REMOVE;
654 return 0;
657 static void stash_downtime_command(struct downtime_entry *dt)
659 dt->slot = dt->start % NUM_DENTRIES;
660 dt->next = dentry[dt->slot];
661 dentry[dt->slot] = dt;
664 static void remove_downtime(struct downtime_entry *dt)
666 if (!is_interesting_service(dt->host, dt->service))
667 return;
669 insert_downtime_event(NEBTYPE_DOWNTIME_STOP, dt->host, dt->service, dt->id);
671 dt_print("RM_DT", ltime, dt);
672 dt->purged = 1;
675 static struct downtime_entry *
676 dt_matches_command(struct downtime_entry *dt, char *host, char *service)
678 for (; dt; dt = dt->next) {
679 time_t diff;
681 if (ltime > dt->stop || ltime < dt->start) {
682 continue;
685 switch (dt->code) {
686 case SCHEDULE_SVC_DOWNTIME:
687 if (service && strcmp(service, dt->service))
688 continue;
690 /* fallthrough */
691 case SCHEDULE_HOST_DOWNTIME:
692 case SCHEDULE_HOST_SVC_DOWNTIME:
693 if (strcmp(host, dt->host)) {
694 continue;
697 case SCHEDULE_AND_PROPAGATE_HOST_DOWNTIME:
698 case SCHEDULE_AND_PROPAGATE_TRIGGERED_HOST_DOWNTIME:
699 /* these two have host set in dt, but
700 * it will not match all the possible hosts */
702 /* fallthrough */
703 case SCHEDULE_HOSTGROUP_HOST_DOWNTIME:
704 case SCHEDULE_HOSTGROUP_SVC_DOWNTIME:
705 case SCHEDULE_SERVICEGROUP_HOST_DOWNTIME:
706 case SCHEDULE_SERVICEGROUP_SVC_DOWNTIME:
707 break;
708 default:
709 lp_crash("dt->code not set properly\n");
713 * Once we get here all the various other criteria have
714 * been matched, so we need to check if the daemon was
715 * running when this downtime was supposed to have
716 * started, and otherwise use the daemon start time
717 * as the value to diff against
719 if (daemon_stop < dt->start && daemon_start > dt->start) {
720 debug("Adjusting dt->start (%lu) to (%lu)\n",
721 dt->start, daemon_start);
722 dt->start = daemon_start;
723 if (dt->trigger && dt->duration)
724 dt->stop = dt->start + dt->duration;
727 diff = ltime - dt->start;
728 if (diff < 3 || dt->trigger || !dt->fixed)
729 return dt;
732 return NULL;
735 static struct downtime_entry *
736 find_downtime_command(char *host, char *service)
738 int i;
739 struct downtime_entry *shortcut = NULL;
741 if (last_dte && last_dte->start == ltime) {
742 shortcut = last_dte;
743 // return last_dte;
745 for (i = 0; i < NUM_DENTRIES; i++) {
746 struct downtime_entry *dt;
747 dt = dt_matches_command(dentry[i], host, service);
748 if (dt) {
749 if (shortcut && dt != shortcut)
750 if (debug_level)
751 printf("FIND shortcut no good\n");
752 last_dte = dt;
753 return dt;
757 debug("FIND not\n");
758 return NULL;
761 static int print_downtime(void *data)
763 struct downtime_entry *dt = (struct downtime_entry *)data;
765 dt_print("UNCLOSED", ltime, dt);
767 return 0;
770 static inline void set_next_dt_purge(time_t base, time_t add)
772 if (!next_dt_purge || next_dt_purge > base + add)
773 next_dt_purge = base + add;
775 if (next_dt_purge <= ltime)
776 next_dt_purge = ltime + 1;
779 static inline void mrln_add_downtime(char *host, char *service, int id)
781 struct downtime_entry *dt, *cmd, *old;
782 dkhash_table *the_table;
784 if (!is_interesting_service(host, service))
785 return;
787 dt = malloc(sizeof(*dt));
788 cmd = find_downtime_command(host, service);
789 if (!cmd) {
790 warn("DT with no ext cmd? %lu %s;%s", ltime, host, service);
791 memset(dt, 0, sizeof(*dt));
792 dt->duration = 7200; /* the default downtime duration in nagios */
793 dt->start = ltime;
794 dt->stop = dt->start + dt->duration;
796 else
797 memcpy(dt, cmd, sizeof(*dt));
799 dt->host = strdup(host);
800 dt->id = id;
801 dt->started = ltime;
803 set_next_dt_purge(ltime, dt->duration);
805 if (service) {
806 dt->service = strdup(service);
807 the_table = service_downtime;
809 else {
810 dt->service = NULL;
811 the_table = host_downtime;
814 old = dkhash_get(the_table, dt->host, dt->service);
815 if (old) {
816 dkhash_remove(the_table, old->host, old->service);
817 free(old->host);
818 if (old->service)
819 free(old->service);
820 free(old);
822 dkhash_insert(the_table, dt->host, dt->service, dt);
824 dt_print("IN_DT", ltime, dt);
825 insert_downtime_event(NEBTYPE_DOWNTIME_START, dt->host, dt->service, dt->id);
828 static time_t last_host_dt_del, last_svc_dt_del;
829 static int register_downtime_command(struct string_code *sc, int nvecs)
831 struct downtime_entry *dt;
832 char *start_time, *end_time, *duration = NULL;
833 char *host = NULL, *service = NULL, *fixed, *triggered_by = NULL;
834 time_t foo;
837 * this could cause crashes if we let it go on, so
838 * bail early if we didn't parse enough fields from
839 * the file.
841 if (nvecs < sc->nvecs) {
842 return -1;
845 switch (sc->code) {
846 case DEL_HOST_DOWNTIME:
847 last_host_dt_del = ltime;
848 return 0;
849 case DEL_SVC_DOWNTIME:
850 last_svc_dt_del = ltime;
851 return 0;
853 case SCHEDULE_HOST_DOWNTIME:
854 if (strtotimet(strv[5], &foo))
855 duration = strv[4];
856 /* fallthrough */
857 case SCHEDULE_AND_PROPAGATE_HOST_DOWNTIME:
858 case SCHEDULE_AND_PROPAGATE_TRIGGERED_HOST_DOWNTIME:
859 case SCHEDULE_HOST_SVC_DOWNTIME:
860 host = strv[0];
861 /* fallthrough */
862 case SCHEDULE_HOSTGROUP_HOST_DOWNTIME:
863 case SCHEDULE_HOSTGROUP_SVC_DOWNTIME:
864 case SCHEDULE_SERVICEGROUP_HOST_DOWNTIME:
865 case SCHEDULE_SERVICEGROUP_SVC_DOWNTIME:
866 start_time = strv[1];
867 end_time = strv[2];
868 fixed = strv[3];
869 if (strtotimet(strv[5], &foo))
870 triggered_by = strv[4];
871 if (!duration)
872 duration = strv[5];
874 break;
876 case SCHEDULE_SVC_DOWNTIME:
877 host = strv[0];
878 service = strv[1];
879 start_time = strv[2];
880 end_time = strv[3];
881 fixed = strv[4];
882 if (strtotimet(strv[6], &foo)) {
883 triggered_by = strv[5];
884 duration = strv[6];
886 else {
887 duration = strv[5];
889 break;
891 default:
892 lp_crash("Unknown downtime type: %d", sc->code);
895 if (!(dt = calloc(sizeof(*dt), 1)))
896 lp_crash("calloc(%u, 1) failed: %s", (uint)sizeof(*dt), strerror(errno));
898 dt->code = sc->code;
899 if (host)
900 dt->host = strdup(host);
901 if (service)
902 dt->service = strdup(service);
904 dt->trigger = triggered_by ? !!(*triggered_by - '0') : 0;
905 dt->start = dt->stop = 0;
906 strtotimet(start_time, &dt->start);
907 strtotimet(end_time, &dt->stop);
910 * if neither of these is set, we can't use this command,
911 * so log it as an unknown event and move on. We really
912 * shouldn't crash here no matter what anyways.
914 if (!dt->start && !dt->stop) {
915 devectorize_string(strv, nvecs);
916 warn("No dt->start or dt->stop in: %s", strv[0]);
917 return -1;
921 * sometimes downtime commands can be logged according to
922 * log version 1, while the log still claims to be version 2.
923 * Apparently, this happens when using a daemon supporting
924 * version 2 logging but a downtime command is added that
925 * follows the version 1 standard.
926 * As such, we simply ignore the result of the "duration"
927 * field conversion and just accept that it might not work.
928 * If it doesn't, we force-set it to 7200, since that's what
929 * Nagios uses as a default, and we'll need two of duration,
930 * start_time and end_time in order to make some sense of
931 * this downtime entry
933 if (strtotimet(duration, &dt->duration) < 0)
934 dt->duration = 7200;
935 dt->fixed = *fixed - '0';
938 * we know we have a duration and at least one of stop
939 * and start. Calculate the other if one is missing.
941 if (!dt->stop) {
942 dt->stop = dt->start + dt->duration;
943 } else if (!dt->start) {
944 dt->start = dt->stop - dt->duration;
947 /* make sure we're not starting timeperiod in the past */
948 if (dt->start < ltime) {
949 dt->start = ltime;
950 if (dt->stop <= dt->start)
951 return 0;
953 /* if fixed, we alter duration. Otherwise we alter 'stop' */
954 if (dt->fixed == 1)
955 dt->duration = dt->stop - dt->start;
956 else
957 dt->stop = dt->start + dt->duration;
961 * ignore downtime scheduled to take place in the future.
962 * It will be picked up by the module anyways
964 if (dt->start > time(NULL)) {
965 free(dt);
966 return 0;
969 if (dt->duration > time(NULL)) {
970 warn("Bizarrely large duration (%lu)", dt->duration);
972 if (dt->start < ltime) {
973 if (dt->duration && dt->duration > ltime - dt->start)
974 dt->duration -= ltime - dt->start;
976 dt->start = ltime;
978 if (dt->stop < ltime || dt->stop < dt->start) {
979 /* retroactively scheduled downtime, or just plain wrong */
980 dt->stop = dt->start;
981 dt->duration = 0;
984 if (dt->fixed && dt->duration != dt->stop - dt->start) {
985 // warn("duration doesn't match stop - start: (%lu : %lu)",
986 // dt->duration, dt->stop - dt->start);
988 dt->duration = dt->stop - dt->start;
990 else if (dt->duration > 86400 * 14) {
991 warn("Oddly long duration: %lu", dt->duration);
994 debug("start=%lu; stop=%lu; duration=%lu; fixed=%d; trigger=%d; host=%s service=%s\n",
995 dt->start, dt->stop, dt->duration, dt->fixed, dt->trigger, dt->host, dt->service);
997 stash_downtime_command(dt);
998 return 0;
1001 static int insert_downtime(struct string_code *sc)
1003 int type;
1004 struct downtime_entry *dt = NULL;
1005 int id = 0;
1006 time_t dt_del_cmd;
1007 char *host, *service = NULL;
1009 host = strv[0];
1010 if (sc->nvecs == 4) {
1011 service = strv[1];
1012 dt = dkhash_get(service_downtime, host, service);
1014 else {
1015 dt = dkhash_get(host_downtime, host, NULL);
1021 * to stop a downtime we can either get STOPPED or
1022 * CANCELLED. So far, I've only ever seen STARTED
1023 * for when it actually starts though, and since
1024 * the Nagios daemon is reponsible for launching
1025 * it, it's unlikely there are more variants of
1026 * that string
1028 type = NEBTYPE_DOWNTIME_STOP;
1029 if (!strcmp(strv[sc->nvecs - 2], "STARTED"))
1030 type = NEBTYPE_DOWNTIME_START;
1032 switch (type) {
1033 case NEBTYPE_DOWNTIME_START:
1034 if (ltime - last_downtime_start > 1)
1035 downtime_id++;
1037 id = downtime_id;
1038 mrln_add_downtime(host, service, id);
1039 last_downtime_start = ltime;
1040 break;
1042 case NEBTYPE_DOWNTIME_STOP:
1043 if (!dt) {
1045 * this can happen when overlapping downtime entries
1046 * occur, and the start event for the second (or nth)
1047 * downtime starts before the first downtime has had
1048 * a stop event. It basically means we've almost
1049 * certainly done something wrong.
1051 //printf("no dt. ds.host_name == '%s'\n", ds.host_name);
1052 //fprintf(stderr, "CRASHING: %s;%s\n", ds.host_name, ds.service_description);
1053 //crash("DOWNTIME_STOP without matching DOWNTIME_START");
1054 dt_skip++;
1055 return 0;
1058 dt_del_cmd = !dt->service ? last_host_dt_del : last_svc_dt_del;
1060 if ((ltime - dt_del_cmd) > 1 && dt->duration - (ltime - dt->started) > 60) {
1061 debug("Short dt duration (%lu) for %s;%s (dt->duration=%lu)\n",
1062 ltime - dt->started, dt->host, dt->service, dt->duration);
1064 if (ltime - dt->started > dt->duration + DT_PURGE_GRACETIME)
1065 dt_print("Long", ltime, dt);
1067 remove_downtime(dt);
1069 * Now delete whatever matching downtimes we can find.
1070 * this must be here, or we'll recurse like crazy into
1071 * remove_downtime(), possibly exhausting the stack
1072 * frame buffer
1074 del_dte = dt;
1075 if (!dt->service)
1076 dkhash_walk_data(host_downtime, del_matching_dt);
1077 else
1078 dkhash_walk_data(service_downtime, del_matching_dt);
1079 break;
1081 default:
1082 return -1;
1085 return 0;
1088 static int dt_purged;
1089 static int purge_expired_dt(void *data)
1091 struct downtime_entry *dt = data;
1093 if (dt->purged) {
1094 dt_skip++;
1095 return 0;
1098 set_next_dt_purge(dt->started, dt->duration);
1100 if (ltime + DT_PURGE_GRACETIME > dt->stop) {
1101 dt_purged++;
1102 debug("PURGE %lu: purging expired dt %d (start=%lu; started=%lu; stop=%lu; duration=%lu; host=%s; service=%s",
1103 ltime, dt->id, dt->start, dt->started, dt->stop, dt->duration, dt->host, dt->service);
1104 remove_downtime(dt);
1105 return DKHASH_WALK_REMOVE;
1107 else {
1108 dt_print("PURGED_NOT_TIME", ltime, dt);
1111 return 0;
1114 static int purged_downtimes;
1115 static void purge_expired_downtime(void)
1117 int tot_purged = 0;
1119 next_dt_purge = 0;
1120 dt_purged = 0;
1121 dkhash_walk_data(host_downtime, purge_expired_dt);
1122 if (dt_purged)
1123 debug("PURGE %d host downtimes purged", dt_purged);
1124 tot_purged += dt_purged;
1125 dt_purged = 0;
1126 dkhash_walk_data(service_downtime, purge_expired_dt);
1127 if (dt_purged)
1128 debug("PURGE %d service downtimes purged", dt_purged);
1129 tot_purged += dt_purged;
1130 if (tot_purged)
1131 debug("PURGE total %d entries purged", tot_purged);
1133 if (next_dt_purge)
1134 debug("PURGE next downtime purge supposed to run @ %lu, in %lu seconds",
1135 next_dt_purge, next_dt_purge - ltime);
1137 purged_downtimes += tot_purged;
1140 static inline void handle_start_event(void)
1142 if (!daemon_is_running)
1143 insert_process_event(NEBTYPE_PROCESS_START);
1145 daemon_start = ltime;
1146 daemon_is_running = 1;
1149 static inline void handle_stop_event(void)
1151 if (daemon_is_running) {
1152 insert_process_event(NEBTYPE_PROCESS_SHUTDOWN);
1153 daemon_is_running = 0;
1155 daemon_stop = ltime;
1158 static int parse_line(char *line, uint len)
1160 char *ptr, *colon;
1161 int result = 0, nvecs = 0;
1162 struct string_code *sc;
1163 static time_t last_ltime = 0;
1165 imported += len + 1; /* make up for 1 lost byte per newline */
1166 line_no++;
1168 /* ignore empty lines */
1169 if (!len)
1170 return 0;
1172 if (++lines_since_progress >= PROGRESS_INTERVAL)
1173 show_progress();
1175 /* skip obviously bogus lines */
1176 if (len < 12 || *line != '[') {
1177 warn("line %d; len too short, or line doesn't start with '[' (%s)", line_no, line);
1178 return -1;
1181 ltime = strtoul(line + 1, &ptr, 10);
1182 if (line + 1 == ptr) {
1183 lp_crash("Failed to parse log timestamp from '%s'. I can't handle malformed logdata", line);
1184 return -1;
1187 if (ltime < last_ltime) {
1188 // warn("ltime < last_ltime (%lu < %lu) by %lu. Compensating...",
1189 // ltime, last_ltime, last_ltime - ltime);
1190 ltime = last_ltime;
1192 else
1193 last_ltime = ltime;
1196 * Incremental will be 0 if not set, or 1 if set but
1197 * the database is currently empty.
1198 * Note that this will not always do the correct thing,
1199 * as downtime entries that might have been scheduled for
1200 * purging may never show up as "stopped" in the database
1201 * with this scheme. As such, incremental imports absolutely
1202 * require that nothing is in scheduled downtime when the
1203 * import is running (well, started really, but it amounts
1204 * to the same thing).
1206 if (ltime < incremental)
1207 return 0;
1209 if (next_dt_purge && ltime >= next_dt_purge)
1210 purge_expired_downtime();
1212 while (*ptr == ']' || *ptr == ' ')
1213 ptr++;
1215 if (!is_interesting(ptr))
1216 return 0;
1218 if (!(colon = strchr(ptr, ':'))) {
1219 /* stupid heuristic, but might be good for something,
1220 * somewhere, sometime. if nothing else, it should suppress
1221 * annoying output */
1222 if (is_start_event(ptr)) {
1223 handle_start_event();
1224 return 0;
1226 if (is_stop_event(ptr)) {
1227 handle_stop_event();
1228 return 0;
1232 * An unhandled event. We should probably crash here
1234 handle_unknown_event(line);
1235 return -1;
1238 /* an event happened without us having gotten a start-event */
1239 if (!daemon_is_running) {
1240 insert_process_event(NEBTYPE_PROCESS_START);
1241 daemon_start = ltime;
1242 daemon_is_running = 1;
1245 if (!(sc = get_event_type(ptr, colon - ptr))) {
1246 handle_unknown_event(line);
1247 return -1;
1250 if (sc->code == IGNORE_LINE)
1251 return 0;
1254 * break out early if we know we won't handle this event
1255 * There's no point in parsing a potentially huge amount
1256 * of lines we're not even interested in
1258 switch (sc->code) {
1259 case NEBTYPE_NOTIFICATION_END + CONCERNS_HOST:
1260 case NEBTYPE_NOTIFICATION_END + CONCERNS_SERVICE:
1261 if (only_notifications)
1262 break;
1263 return 0;
1264 default:
1265 if (only_notifications)
1266 return 0;
1267 break;
1270 *colon = 0;
1271 ptr = colon + 1;
1272 while (*ptr == ' ')
1273 ptr++;
1275 if (sc->nvecs) {
1276 int i;
1278 nvecs = vectorize_string(ptr, sc->nvecs);
1280 if (nvecs != sc->nvecs) {
1281 /* broken line */
1282 warn("Line %d in %s seems to not have all the fields it should",
1283 line_no, cur_file->path);
1284 return -1;
1287 for (i = 0; i < sc->nvecs; i++) {
1288 if (!strv[i]) {
1289 /* this should never happen */
1290 warn("Line %d in %s seems to be broken, or we failed to parse it into a vector",
1291 line_no, cur_file->path);
1292 return -1;
1297 switch (sc->code) {
1298 char *semi_colon;
1300 case NEBTYPE_EXTERNALCOMMAND_END:
1301 semi_colon = strchr(ptr, ';');
1302 if (!semi_colon)
1303 return 0;
1304 if (!(sc = get_command_type(ptr, semi_colon - ptr))) {
1305 return 0;
1307 if (sc->code == RESTART_PROGRAM) {
1308 handle_stop_event();
1309 return 0;
1312 nvecs = vectorize_string(semi_colon + 1, sc->nvecs);
1313 if (nvecs != sc->nvecs) {
1314 warn("nvecs discrepancy: %d vs %d (%s)\n", nvecs, sc->nvecs, ptr);
1316 if (sc->code != ACKNOWLEDGE_HOST_PROBLEM &&
1317 sc->code != ACKNOWLEDGE_SVC_PROBLEM)
1319 register_downtime_command(sc, nvecs);
1320 } else {
1321 insert_acknowledgement(sc);
1323 break;
1325 case NEBTYPE_HOSTCHECK_PROCESSED:
1326 result = insert_host_check(sc);
1327 break;
1329 case NEBTYPE_SERVICECHECK_PROCESSED:
1330 result = insert_service_check(sc);
1331 break;
1333 case NEBTYPE_DOWNTIME_LOAD + CONCERNS_HOST:
1334 case NEBTYPE_DOWNTIME_LOAD + CONCERNS_SERVICE:
1335 result = insert_downtime(sc);
1336 break;
1338 case NEBTYPE_NOTIFICATION_END + CONCERNS_HOST:
1339 case NEBTYPE_NOTIFICATION_END + CONCERNS_SERVICE:
1340 result = insert_notification(sc);
1341 break;
1343 case IGNORE_LINE:
1344 return 0;
1347 handle_sql_result(result, db_table);
1348 return 0;
1351 static int parse_one_line(char *str, uint len)
1353 const char *msg;
1355 if (parse_line(str, len) && use_database && sql_error(&msg))
1356 lp_crash("sql error: %s", msg);
1358 return 0;
1361 static int hash_one_line(char *line, uint len)
1363 return add_interesting_object(line);
1366 static int hash_interesting(const char *path)
1368 struct stat st;
1370 if (stat(path, &st) < 0)
1371 lp_crash("failed to stat %s: %s", path, strerror(errno));
1373 lparse_path(path, st.st_size, hash_one_line);
1375 return 0;
1379 __attribute__((__format__(__printf__, 1, 2)))
1380 static void usage(const char *fmt, ...)
1382 if (fmt && *fmt) {
1383 va_list ap;
1385 va_start(ap, fmt);
1386 vfprintf(stdout, fmt, ap);
1387 va_end(ap);
1390 printf("Usage %s [options] [logfiles]\n\n", progname);
1391 printf(" [logfiles] refers to all the nagios logfiles you want to import\n");
1392 printf(" If --nagios-cfg is given or can be inferred no logfiles need to be supplied\n");
1393 printf("\nOptions:\n");
1394 printf(" --help this cruft\n");
1395 printf(" --no-progress don't display progress output\n");
1396 printf(" --no-sql don't access the database\n");
1397 printf(" --db-name database name\n");
1398 printf(" --db-table database table name\n");
1399 printf(" --db-user database user\n");
1400 printf(" --db-pass database password\n");
1401 printf(" --db-host database host\n");
1402 printf(" --db-port database port\n");
1403 printf(" --db-type database type\n");
1404 printf(" --db-conn-str database connection string\n");
1405 printf(" --[no-]repair] should we autorepair tables?\n");
1406 printf(" --incremental[=<when>] do an incremental import (since $when)\n");
1407 printf(" --truncate-db truncate database before importing\n");
1408 printf(" --only-notifications only import notifications\n");
1409 printf(" --nagios-cfg=</path/to/nagios.cfg> path to nagios.cfg\n");
1410 printf(" --list-files list files to import\n");
1411 printf("\n\n");
1413 if (fmt && *fmt)
1414 exit(1);
1416 exit(0);
1419 int main(int argc, char **argv)
1421 int i, truncate_db = 0;
1422 const char *nagios_cfg = NULL;
1423 char *db_name, *db_user, *db_pass;
1424 char *db_conn_str, *db_host, *db_port, *db_type;
1426 progname = strrchr(argv[0], '/');
1427 progname = progname ? progname + 1 : argv[0];
1429 use_database = 1;
1430 db_name = db_user = db_pass = NULL;
1431 db_conn_str = db_host = db_port = db_type = NULL;
1433 do_progress = isatty(fileno(stdout));
1435 strv = calloc(sizeof(char *), MAX_NVECS);
1436 dentry = calloc(sizeof(*dentry), NUM_DENTRIES);
1437 if (!strv || !dentry)
1438 crash("Failed to alloc initial structs");
1441 for (num_nfile = 0,i = 1; i < argc; i++) {
1442 char *opt, *arg = argv[i];
1443 int arg_len, eq_opt = 0;
1445 if ((opt = strchr(arg, '='))) {
1446 *opt++ = '\0';
1447 eq_opt = 1;
1449 else if (i < argc - 1) {
1450 opt = argv[i + 1];
1453 if (!prefixcmp(arg, "-h") || !prefixcmp(arg, "--help")) {
1454 usage(NULL);
1456 if (!prefixcmp(arg, "--incremental")) {
1457 incremental = 1;
1460 * nifty for debugging --incremental skipping log-files
1461 * The value will be overwritten unless --no-sql is also
1462 * in effect
1464 if (eq_opt) {
1465 incremental = strtoul(opt, NULL, 0);
1466 if (!incremental)
1467 usage("--incremental= requires a parameter");
1469 * since we use '1' to mean "determine automatically",
1470 * we magic a '1' from userspace to '2'. In practice,
1471 * this just means the user doesn't need to know a
1472 * thing about this program's internals.
1474 if (incremental == 1)
1475 incremental = 2;
1477 continue;
1479 if (!prefixcmp(arg, "--no-sql")) {
1480 use_database = 0;
1481 continue;
1483 if (!prefixcmp(arg, "--no-repair")) {
1484 repair_table = 0;
1485 continue;
1487 if (!prefixcmp(arg, "--repair")) {
1488 repair_table = 1;
1489 continue;
1491 if (!prefixcmp(arg, "--only-notifications")) {
1492 only_notifications = 1;
1493 db_table = db_table ? db_table : "notification";
1494 continue;
1496 if (!prefixcmp(arg, "--no-progress")) {
1497 do_progress = 0;
1498 continue;
1500 if (!prefixcmp(arg, "--debug") || !prefixcmp(arg, "-d")) {
1501 do_progress = 0;
1502 debug_level++;
1503 continue;
1505 if (!prefixcmp(arg, "--truncate-db")) {
1506 truncate_db = 1;
1507 continue;
1509 if (!prefixcmp(arg, "--list-files")) {
1510 list_files = 1;
1511 do_progress = 0;
1512 continue;
1514 if (!prefixcmp(arg, "--nagios-cfg")) {
1515 if (!opt || !*opt) {
1516 crash("%s requires the path to nagios.cfg as argument", arg);
1518 nagios_cfg = opt;
1519 if (opt && !eq_opt)
1520 i++;
1521 continue;
1523 if (!prefixcmp(arg, "--db-name")) {
1524 if (!opt || !*opt)
1525 crash("%s requires a database name as an argument", arg);
1526 db_name = opt;
1527 if (opt && !eq_opt)
1528 i++;
1529 continue;
1531 if (!prefixcmp(arg, "--db-user")) {
1532 if (!opt || !*opt)
1533 crash("%s requires a database username as argument", arg);
1534 db_user = opt;
1535 if (opt && !eq_opt)
1536 i++;
1537 continue;
1539 if (!prefixcmp(arg, "--db-pass")) {
1540 if (!opt || !*opt)
1541 crash("%s requires a database username as argument", arg);
1542 db_pass = opt;
1543 if (opt && !eq_opt)
1544 i++;
1545 continue;
1547 if (!prefixcmp(arg, "--db-table")) {
1548 if (!opt || !*opt)
1549 crash("%s requires a database table name as argument", arg);
1550 db_table = opt;
1551 if (opt && !eq_opt)
1552 i++;
1553 continue;
1555 if (!prefixcmp(arg, "--db-conn-str")) {
1556 if (!opt || !*opt)
1557 crash("%s requires a connection string as argument", arg);
1558 db_conn_str = opt;
1559 if (opt && !eq_opt)
1560 i++;
1561 continue;
1563 if (!prefixcmp(arg, "--db-host")) {
1564 if (!opt || !*opt)
1565 crash("%s requires a host as argument", arg);
1566 db_host = opt;
1567 if (opt && !eq_opt)
1568 i++;
1569 continue;
1571 if (!prefixcmp(arg, "--db-port")) {
1572 if (!opt || !*opt)
1573 crash("%s requires a port as argument", arg);
1574 db_port = opt;
1575 if (opt && !eq_opt)
1576 i++;
1577 continue;
1579 if (!prefixcmp(arg, "--db-type")) {
1580 if (!opt || !*opt)
1581 crash("%s requires a database type as an argument", arg);
1582 db_type = opt;
1583 if (opt && !eq_opt)
1584 i++;
1585 continue;
1587 if (!prefixcmp(arg, "--interesting") || !prefixcmp(arg, "-i")) {
1588 if (!opt || !*opt)
1589 crash("%s requires a filename as argument", arg);
1590 hash_interesting(opt);
1591 if (opt && !eq_opt)
1592 i++;
1593 continue;
1596 /* non-argument, so treat as a config- or log-file */
1597 arg_len = strlen(arg);
1598 if (arg_len >= 10 && !strcmp(&arg[arg_len - 10], "nagios.cfg")) {
1599 nagios_cfg = arg;
1600 } else {
1601 add_naglog_path(arg);
1605 /* fallback for op5 systems */
1606 if (!nagios_cfg && !num_nfile) {
1607 nagios_cfg = "/opt/monitor/etc/nagios.cfg";
1609 if (nagios_cfg) {
1610 struct cfg_comp *conf;
1611 uint vi;
1613 conf = cfg_parse_file(nagios_cfg);
1614 for (vi = 0; vi < conf->vars; vi++) {
1615 struct cfg_var *v = conf->vlist[vi];
1616 if (!strcmp(v->key, "log_file")) {
1617 add_naglog_path(v->value);
1619 if (!strcmp(v->key, "log_archive_path")) {
1620 add_naglog_path(v->value);
1625 if (!list_files && use_database && (!truncate_db && !incremental)) {
1626 printf("Defaulting to incremental mode\n");
1627 incremental = 1;
1630 if (use_database) {
1631 db_user = db_user ? db_user : "merlin";
1632 db_pass = db_pass ? db_pass : "merlin";
1633 db_type = db_type ? db_type : "mysql";
1634 sql_config("user", db_user);
1635 sql_config("pass", db_pass);
1636 sql_config("type", db_type);
1637 if (db_conn_str) {
1638 sql_config("conn_str", db_conn_str);
1639 } else {
1640 db_name = db_name ? db_name : "merlin";
1641 db_table = db_table ? db_table : "report_data";
1642 db_host = db_host ? db_host : "localhost";
1643 sql_config("database", db_name);
1644 sql_config("host", db_host);
1645 sql_config("port", db_port);
1648 sql_config("commit_interval", "0");
1649 sql_config("commit_queries", "10000");
1651 if (sql_init() < 0) {
1652 crash("sql_init() failed. db=%s, table=%s, user=%s, db msg=[%s]",
1653 db_name, db_table, db_user, sql_error_msg());
1655 if (truncate_db)
1656 sql_query("TRUNCATE %s", db_table);
1658 if (incremental == 1) {
1659 db_wrap_result * result = NULL;
1660 sql_query("SELECT MAX(%s) FROM %s.%s",
1661 only_notifications ? "end_time" : "timestamp",
1662 db_name, db_table);
1664 if (!(result = sql_get_result()))
1665 crash("Failed to get last timestamp: %s\n", sql_error_msg());
1667 * someone might use --incremental with an empty
1668 * database. We shouldn't crash in that case
1670 if (0 == result->api->step(result)) {
1671 /* reminder: incremental is time_t and may be either uint32_t or uint64.
1672 Thus we use an extra int object here to avoid passing an invalid pointer
1673 to (&incremental) on platforms where time_t is not uint32_t.
1675 int32_t inctime = 0;
1676 result->api->get_int32_ndx(result, 0, &inctime);
1677 incremental = inctime;
1679 sql_free_result();
1683 log_grok_var("logfile", "/dev/null");
1684 log_grok_var("log_levels", "warn");
1686 if (!num_nfile)
1687 usage("No files or directories specified, or nagios.cfg not found");
1689 if (log_init() < 0)
1690 crash("log_init() failed");
1692 qsort(nfile, num_nfile, sizeof(*nfile), nfile_cmp);
1694 host_downtime = dkhash_create(HASH_TABLE_SIZE);
1695 service_downtime = dkhash_create(HASH_TABLE_SIZE);
1697 if (state_init() < 0)
1698 crash("Failed to initialize state machinery");
1700 /* go through them once to count the total size for progress output */
1701 for (i = 0; i < num_nfile; i++) {
1702 totsize += nfile[i].size;
1705 if (!list_files) {
1706 gettimeofday(&import_start, NULL);
1707 printf("Importing %s of data from %d files\n",
1708 human_bytes(totsize), num_nfile);
1711 for (i = 0; i < num_nfile; i++) {
1712 struct naglog_file *nf = &nfile[i];
1713 cur_file = nf;
1714 show_progress();
1717 * skip parsing files if they're not interesting, such
1718 * as during incremental imports.
1719 * 'incremental' will be 0 if we're doing a full import,
1720 * 1 if we're doing an incremental but the database is
1721 * empty and will contain the timestamp of the latest
1722 * entry in the database if we're doing an incremental
1723 * import to a populated table.
1724 * Note that we can never skip the last file in the list,
1725 * although the lparse routine should sift through it
1726 * pretty quickly in case it has nothing interesting.
1728 if (i + 1 < num_nfile && incremental > nfile[i + 1].first) {
1729 skipped_files++;
1730 skipped += nf->size;
1731 continue;
1733 if (list_files) {
1734 printf("%s\n", nf->path);
1735 continue;
1737 debug("importing from %s (%lu : %u)\n", nf->path, nf->first, nf->cmp);
1738 line_no = 0;
1739 lparse_path(nf->path, nf->size, parse_one_line);
1740 imported++; /* make up for one lost byte per file */
1743 ltime = time(NULL);
1744 purge_expired_downtime();
1745 end_progress();
1747 if (debug_level) {
1748 if (dt_depth) {
1749 printf("Unclosed host downtimes:\n");
1750 puts("------------------------");
1751 dkhash_walk_data(host_downtime, print_downtime);
1752 printf("Unclosed service downtimes:\n");
1753 puts("---------------------------");
1754 dkhash_walk_data(service_downtime, print_downtime);
1756 printf("dt_depth: %d\n", dt_depth);
1758 printf("purged downtimes: %d\n", purged_downtimes);
1759 printf("max simultaneous host downtime hashes: %u\n",
1760 dkhash_num_entries_max(host_downtime));
1761 printf("max simultaneous service downtime hashes: %u\n",
1762 dkhash_num_entries_max(service_downtime));
1763 printf("max downtime depth: %u\n", max_dt_depth);
1766 if (use_database) {
1767 if (!only_notifications)
1768 insert_extras(); /* must be before indexing */
1769 enable_indexes();
1770 sql_close();
1773 if (warnings && debug_level)
1774 fprintf(stderr, "Total warnings: %d\n", warnings);
1776 if (debug_level || dt_start > dt_stop) {
1777 uint count;
1778 fprintf(stderr, "Downtime data %s\n started: %d\n stopped: %d\n delta : %d\n skipped: %d\n",
1779 dt_depth ? "mismatch!" : "consistent", dt_start, dt_stop, dt_depth, dt_skip);
1780 if ((count = dkhash_num_entries(host_downtime))) {
1781 fprintf(stderr, "host_downtime as %u entries remaining\n", count);
1783 if ((count = dkhash_num_entries(service_downtime))) {
1784 fprintf(stderr, "service_downtime has %u entries remaining\n", count);
1788 print_unhandled_events();
1790 return 0;