db_updater: Put parentheses back
[merlin.git] / daemon.c
blobfd0ddd1381379e4d836bf572e7aa2648bf000d5f
1 #define _GNU_SOURCE 1
2 #include <signal.h>
3 #include "sql.h"
4 #include "daemonize.h"
5 #include "daemon.h"
7 static const char *progname;
8 static const char *pidfile, *merlin_user;
9 static char *import_program;
10 unsigned short default_port = 15551;
11 unsigned int default_addr = 0;
12 static int importer_pid;
13 static merlin_confsync csync;
14 static int num_children;
15 static int killing;
16 static int user_sig;
17 int db_log_reports = 1;
18 int db_log_notifications = 1;
19 int db_track_current = 0;
20 static merlin_nodeinfo merlind;
21 static int merlind_sig;
23 static void usage(char *fmt, ...)
24 __attribute__((format(printf,1,2)));
26 static void usage(char *fmt, ...)
28 if (fmt) {
29 va_list ap;
31 va_start(ap, fmt);
32 vprintf(fmt, ap);
33 va_end(ap);
34 putchar('\n');
37 printf("Usage: %s -c <config-file> [-d] [-h]\n\n", progname);
39 exit(1);
42 void db_mark_node_inactive(merlin_node *node)
44 int node_id;
46 if (!use_database || !db_track_current)
47 return;
49 node_id = node == &ipc ? 0 : node->id + 1;
50 sql_query("UPDATE program_status "
51 "SET is_running = 0 "
52 "WHERE instance_id = %d",
53 node_id);
56 /* node connect/disconnect handlers */
57 static int node_action_handler(merlin_node *node, int prev_state)
59 switch (node->state) {
60 case STATE_PENDING:
61 case STATE_NEGOTIATING:
62 case STATE_NONE:
63 node_disconnect(node, "%s disconnected", node->name);
65 /* only send INACTIVE if we haven't already */
66 if (prev_state == STATE_CONNECTED) {
67 db_mark_node_inactive(node);
68 ldebug("Sending IPC control INACTIVE for '%s'", node->name);
69 return ipc_send_ctrl(CTRL_INACTIVE, node->id);
73 return 1;
76 static int ipc_action_handler(merlin_node *node, int prev_state)
78 uint i;
80 switch (node->state) {
81 case STATE_CONNECTED:
82 if (db_track_current && sql_is_connected(1)) {
83 sql_query("UPDATE program_status SET "
84 "is_running = 1, last_alive = %lu "
85 "WHERE instance_id = 0", time(NULL));
87 break;
89 case STATE_PENDING:
90 case STATE_NEGOTIATING:
91 case STATE_NONE:
92 /* if ipc wasn't connected before, we return early */
93 if (prev_state != STATE_CONNECTED)
94 return 0;
96 /* make sure the gui knows the module isn't running any more */
97 db_mark_node_inactive(&ipc);
99 /* also tell our peers and masters */
100 for (i = 0; i < num_masters + num_peers; i++) {
101 merlin_node *n = node_table[i];
102 node_send_ctrl_inactive(n, CTRL_GENERIC, 100);
106 return 0;
109 static void grok_daemon_compound(struct cfg_comp *comp)
111 uint i;
113 for (i = 0; i < comp->vars; i++) {
114 struct cfg_var *v = comp->vlist[i];
116 if (!strcmp(v->key, "port")) {
117 char *endp;
119 default_port = (unsigned short)strtoul(v->value, &endp, 0);
120 if (default_port < 1 || *endp)
121 cfg_error(comp, v, "Illegal value for port: %s", v->value);
122 continue;
124 if (!strcmp(v->key, "address")) {
125 unsigned int addr;
126 if (inet_pton(AF_INET, v->value, &addr) == 1)
127 default_addr = addr;
128 else
129 cfg_error(comp, v, "Illegal value for address: %s", v->value);
130 continue;
132 if (!strcmp(v->key, "pidfile")) {
133 pidfile = strdup(v->value);
134 continue;
136 if (!strcmp(v->key, "merlin_user")) {
137 merlin_user = strdup(v->value);
138 continue;
140 if (!strcmp(v->key, "import_program")) {
141 import_program = strdup(v->value);
142 continue;
145 if (grok_common_var(comp, v))
146 continue;
147 if (log_grok_var(v->key, v->value))
148 continue;
150 cfg_error(comp, v, "Unknown variable");
153 for (i = 0; i < comp->nested; i++) {
154 struct cfg_comp *c = comp->nest[i];
155 uint vi;
157 if (!prefixcmp(c->name, "database")) {
158 use_database = 1;
159 for (vi = 0; vi < c->vars; vi++) {
160 struct cfg_var *v = c->vlist[vi];
161 if (!strcmp(v->key, "log_report_data")) {
162 db_log_reports = strtobool(v->value);
163 } else if (!prefixcmp(v->key, "log_notification")) {
164 db_log_notifications = strtobool(v->value);
165 } else if (!prefixcmp(v->key, "track_current")) {
166 db_track_current = strtobool(v->value);
167 } else if (!strcmp(v->key, "enabled")) {
168 use_database = strtobool(v->value);
169 } else {
170 sql_config(v->key, v->value);
173 continue;
175 if (!strcmp(c->name, "object_config")) {
176 grok_confsync_compound(c, &csync);
177 continue;
182 /* daemon-specific node manipulation */
183 static void post_process_nodes(void)
185 uint i, x;
187 ldebug("post processing %d masters, %d pollers, %d peers",
188 num_masters, num_pollers, num_peers);
190 for (i = 0; i < num_nodes; i++) {
191 merlin_node *node = node_table[i];
193 if (!node) {
194 lerr("node is null. i is %d. num_nodes is %d. wtf?", i, num_nodes);
195 continue;
198 if (!node->csync.configured && csync.push.cmd) {
199 if (asprintf(&node->csync.push.cmd, "%s %s", csync.push.cmd, node->name) < 0)
200 lerr("CSYNC: Failed to add per-node confsync command for %s", node->name);
201 else
202 ldebug("CSYNC: Adding per-node sync to %s as: %s\n", node->name, node->csync.push.cmd);
205 if (!node->sain.sin_port)
206 node->sain.sin_port = htons(default_port);
208 node->action = node_action_handler;
210 node->ioc = iocache_create(MERLIN_IOC_BUFSIZE);
211 if (node->ioc == NULL) {
212 lerr("Failed to malloc(%i) for io cache for node %s. Aborting",
213 MERLIN_IOC_BUFSIZE, node->name);
217 * this lets us support multiple merlin instances on
218 * a single system, but all instances on the same
219 * system will be marked at the same time, so we skip
220 * them on the second pass here.
222 if (node->flags & MERLIN_NODE_FIXED_SRCPORT) {
223 continue;
226 if (node->sain.sin_addr.s_addr == htonl(INADDR_LOOPBACK)) {
227 node->flags |= MERLIN_NODE_FIXED_SRCPORT;
228 ldebug("Using fixed source-port for local %s node %s",
229 node_type(node), node->name);
230 continue;
232 for (x = i + 1; x < num_nodes; x++) {
233 merlin_node *nx = node_table[x];
234 if (node->sain.sin_addr.s_addr == nx->sain.sin_addr.s_addr) {
235 ldebug("Using fixed source-port for %s node %s",
236 node_type(node), node->name);
237 ldebug("Using fixed source-port for %s node %s",
238 node_type(nx), nx->name);
239 node->flags |= MERLIN_NODE_FIXED_SRCPORT;
240 nx->flags |= MERLIN_NODE_FIXED_SRCPORT;
242 if (node->sain.sin_port == nx->sain.sin_port) {
243 lwarn("Nodes %s and %s have same ip *and* same port. Voodoo?",
244 node->name, nx->name);
251 static int grok_config(char *path)
253 uint i;
254 struct cfg_comp *config;
256 if (!path)
257 return 0;
259 config = cfg_parse_file(path);
260 if (!config)
261 return 0;
263 for (i = 0; i < config->vars; i++) {
264 struct cfg_var *v = config->vlist[i];
266 if (!v->value)
267 cfg_error(config, v, "No value for option '%s'", v->key);
269 if (grok_common_var(config, v))
270 continue;
272 if (!strcmp(v->key, "port")) {
273 default_port = (unsigned short)strtoul(v->value, NULL, 0);
274 continue;
277 cfg_warn(config, v, "Unrecognized variable\n");
280 for (i = 0; i < config->nested; i++) {
281 struct cfg_comp *c = config->nest[i];
283 if (!prefixcmp(c->name, "daemon")) {
284 grok_daemon_compound(c);
285 continue;
290 * if we're supposed to kill a running daemon, ignore
291 * parsing and post-processing nodes. We avoid memory
292 * fragmentation by releasing the config memory before
293 * allocating memory for the nodes.
295 if (!killing) {
296 node_grok_config(config);
298 cfg_destroy_compound(config);
299 if (!killing) {
300 post_process_nodes();
303 return 1;
307 * if the import isn't done yet waitpid() will return 0
308 * and we won't touch importer_pid at all.
310 static void reap_child_process(void)
312 int status, pid;
313 unsigned int i;
315 if (!num_children)
316 return;
318 pid = waitpid(-1, &status, WNOHANG);
319 if (pid < 0) {
320 if (errno == ECHILD) {
321 /* no child running. Just reset */
322 num_children = importer_pid = 0;
323 } else {
324 /* some random error. log it */
325 lerr("waitpid(-1...) failed: %s", strerror(errno));
328 return;
331 /* child may not be done yet */
332 if (!pid)
333 return;
335 /* we reaped an actual child, so decrement the counter */
336 num_children--;
338 /* looks like we reaped some helper we spawned */
339 linfo("Child with pid %d successfully reaped", pid);
341 if (pid == importer_pid) {
342 if (WIFEXITED(status)) {
343 if (!WEXITSTATUS(status)) {
344 linfo("import program finished. Resuming normal operations");
345 } else {
346 lwarn("import program exited with return code %d", WEXITSTATUS(status));
348 } else {
349 lerr("import program stopped or killed. That's a Bad Thing(tm)");
351 /* successfully reaped, so reset and resume */
352 importer_pid = 0;
353 ipc_send_ctrl(CTRL_RESUME, CTRL_GENERIC);
354 return;
357 /* not the importer program, so it must be an oconf push or fetch */
358 for (i = 0; i < num_nodes; i++) {
359 merlin_node *node = node_table[i];
360 if (pid == node->csync.push.pid) {
361 linfo("CSYNC: push finished for %s", node->name);
362 node->csync.push.pid = 0;
363 return;
364 } else if (pid == node->csync.fetch.pid) {
365 linfo("CSYNC: fetch finished from %s", node->name);
366 node->csync.fetch.pid = 0;
367 return;
373 * Run a program, stashing the child pid in *pid.
374 * Since it's not supposed to run all that often, we don't care a
375 * whole lot about performance and lazily run all commands through
376 * /bin/sh for argument handling
378 static void run_program(char *what, char *cmd, int *prog_id)
380 char *args[4] = { "sh", "-c", cmd, NULL };
381 int pid;
383 linfo("Executing %s command '%s'", what, cmd);
384 pid = fork();
385 if (!pid) {
387 * child runs the command. if execvp() returns, that means it
388 * failed horribly and that we're basically screwed
390 execv("/bin/sh", args);
391 lerr("execv() failed: %s", strerror(errno));
392 exit(1);
394 if (pid < 0) {
395 lerr("Skipping %s due to failed fork(): %s", what, strerror(errno));
396 return;
399 * everything went ok, so update prog_id if passed
400 * and increment num_children
402 if (prog_id)
403 *prog_id = pid;
404 num_children++;
408 * import objects and status from objects.cache and status.log,
409 * respecively
411 static int import_objects_and_status(char *cfg, char *cache, char *status)
413 char *cmd;
414 int result = 0;
416 /* don't bother if we're not using a datbase */
417 if (!use_database)
418 return 0;
420 /* ... or if an import is already in progress */
421 if (importer_pid) {
422 lwarn("Import already in progress. Ignoring import event");
423 return 0;
426 if (!import_program) {
427 lerr("No import program specified. Ignoring import event");
428 return 0;
431 asprintf(&cmd, "%s --nagios-cfg='%s' "
432 "--db-type='%s' --db-name='%s' --db-user='%s' --db-pass='%s' --db-host='%s' --db-conn_str='%s'",
433 import_program, cfg,
434 sql_db_type(), sql_db_name(), sql_db_user(), sql_db_pass(), sql_db_host(), sql_db_conn_str());
435 if (cache && *cache) {
436 char *cmd2 = cmd;
437 asprintf(&cmd, "%s --cache='%s'", cmd2, cache);
438 free(cmd2);
440 if (db_track_current && status && *status) {
441 cmd2 = cmd;
442 asprintf(&cmd, "%s --status-log='%s'", cmd2, status);
443 free(cmd2);
447 if (sql_db_port()) {
448 char *cmd2 = cmd;
449 asprintf(&cmd, "%s --db-port='%u'", cmd2, sql_db_port());
450 free(cmd2);
453 run_program("import", cmd, &importer_pid);
454 free(cmd);
457 * If the import program started successfully, we
458 * ask the module to stall events until it's done
460 if (importer_pid > 0) {
461 ipc_send_ctrl(CTRL_STALL, CTRL_GENERIC);
464 return result;
467 /* nagios.cfg, objects.cache (optional) and status.log (optional) */
468 static int read_nagios_paths(merlin_event *pkt)
470 char *nagios_paths_arena;
471 char *npath[3] = { NULL, NULL, NULL };
472 uint i;
473 size_t offset = 0;
475 if (!use_database)
476 return 0;
478 nagios_paths_arena = malloc(pkt->hdr.len);
479 if (!nagios_paths_arena)
480 return -1;
481 memcpy(nagios_paths_arena, pkt->body, pkt->hdr.len);
483 for (i = 0; i < ARRAY_SIZE(npath) && offset < pkt->hdr.len; i++) {
484 npath[i] = nagios_paths_arena + offset;
485 offset += strlen(npath[i]) + 1;
488 import_objects_and_status(npath[0], npath[1], npath[2]);
489 free(nagios_paths_arena);
491 * we don't need to do this until we're merging the reports-module
492 * into merlin
494 /* prime_object_states(&hosts, &services); */
496 return 0;
500 * Compares *node's info struct and returns:
501 * 0 if node's config is same as ours (we should do nothing)
502 * > 0 if node's config is newer than ours (we should fetch)
503 * < 0 if node's config is older than ours (we should push)
505 * If hashes don't match but config is exactly the same
506 * age, we instead return:
507 * > 0 if node started after us (we should fetch)
508 * < 0 if node started before us (we should push)
510 * If all of the above are identical, we return the hash delta.
511 * This should only happen rarely, but it will ensure that not
512 * both sides try to fetch or push at the same time.
514 static int csync_config_cmp(merlin_node *node, int *was_error)
516 int mtime_delta;
517 *was_error =0;
519 ldebug("CSYNC: %s: Comparing config", node->name);
520 if (!ipc.info.last_cfg_change) {
522 * if our module is inactive, we can't know anything so we
523 * can't do anything, and we can't fetch the last config
524 * change time, since it might be being changed as we speak.
526 ldebug("CSYNC: %s: Our module is inactive, so can't check", node->name);
527 *was_error = 1;
528 return 0;
532 * All peers must have identical configuration
534 if (node->type == MODE_PEER) {
535 int hash_delta;
536 hash_delta = memcmp(node->info.config_hash, ipc.info.config_hash, 20);
537 if (!hash_delta) {
538 ldebug("CSYNC: %s: hashes match. No sync required", node->name);
539 return 0;
541 *was_error = 1;
544 /* For non-peers, we simply move on from here. */
545 mtime_delta = node->info.last_cfg_change - ipc.info.last_cfg_change;
546 if (mtime_delta) {
547 ldebug("CSYNC: %s: mtime_delta (%lu - %lu): %d", node->name,
548 node->info.last_cfg_change, ipc.info.last_cfg_change, mtime_delta);
549 return mtime_delta;
553 * Error path. This node is a peer, but we have a hash mismatch
554 * and matching mtimes. Unusual, to say the least. Either way,
555 * we can't really do anything except warn about it and get
556 * on with things. This will only happen when someone manages
557 * to save the config exactly the same second on both nodes.
559 lerr("CSYNC: %s: Can't determine confsync action", node->name);
560 lerr("CSYNC: %s: hash mismatch but mtime matches", node->name);
561 lerr("CSYNC: %s: User intervention required.", node->name);
563 *was_error = 1;
564 return 0;
568 * executed when a node comes online and reports itself as
569 * being active. This is where we run the configuration sync
570 * if any is configured
572 * Note that the 'push' and 'fetch' options in the configuration
573 * are simply guidance names. One could configure them in reverse
574 * if one wanted, or make them boil noodles for the IT staff or
575 * paint a skateboard blue for all Merlin cares. It will just
576 * assume that things work out just fine so long as the config
577 * is (somewhat) in sync.
579 void csync_node_active(merlin_node *node)
581 time_t now;
582 int val = 0, error = 0;
583 merlin_confsync *cs = NULL;
584 merlin_child *child = NULL;
586 ldebug("CSYNC: %s: Checking...", node->name);
587 /* bail early if we have no push/fetch configuration */
588 cs = &node->csync;
589 if (!cs->push.cmd && !cs->fetch.cmd) {
590 ldebug("CSYNC: %s: No config sync configured.", node->name);
591 node_disconnect(node, "Disconnecting from %s, as config can't be synced", node->name);
592 return;
595 val = csync_config_cmp(node, &error);
596 if (val || error)
597 node_disconnect(node, "Disconnecting from %s, as config is out of sync", node->name);
599 if (!val)
600 return;
603 * The most common setup is that configuration is done on a master
604 * node and then pushed to the pollers, so if a master has older
605 * config than we do, a node-specific "push" command is required
606 * to make us push to that master.
607 * This is to prevent normal setup behaviour from engaging in
608 * pingpong action with config-files when config-files take more
609 * than 1 second to generate
611 if (val < 0 && node->type == MODE_MASTER && cs == &csync) {
612 ldebug("CSYNC: Refusing to run global sync to a master node");
613 return;
616 if (val < 0) {
617 if (cs->push.cmd && strcmp(cs->push.cmd, "no")) {
618 child = &cs->push;
619 ldebug("CSYNC: We'll try to push");
620 } else {
621 ldebug("CSYNC: Should have pushed, but push not configured for %s", node->name);
623 if (cs == &csync && !(node->flags & MERLIN_NODE_CONNECT)) {
624 ldebug("CSYNC: %s node %s configured with 'connect = no'. Avoiding global push",
625 node_type(node), node->name);
626 return;
628 } else if (val > 0) {
629 if (cs->fetch.cmd && strcmp(cs->fetch.cmd, "no")) {
630 child = &cs->fetch;
631 ldebug("CSYNC: We'll try to fetch");
632 } else {
633 ldebug("CSYNC: Should have fetched, but fetch not configured for %s", node->name);
637 if (!child) {
638 ldebug("CSYNC: No action required for %s", node->name);
639 return;
642 if (child->pid) {
643 ldebug("CSYNC: '%s' already running for %s, or globally", child->cmd, node->name);
644 return;
647 now = time(NULL);
648 if (node->csync_last_attempt >= now - 30) {
649 ldebug("CSYNC: Config sync attempted %lu seconds ago. Waiting at least %lu seconds",
650 now - node->csync_last_attempt, 30 - (now - node->csync_last_attempt));
651 return;
654 node->csync_num_attempts++;
655 linfo("CSYNC: triggered to %s node %s; val: %d; command: [%s]",
656 node_type(node), node->name, val, child->cmd);
657 node->csync_last_attempt = now;
658 run_program("csync", child->cmd, &child->pid);
659 if (child->pid > 0) {
660 ldebug("CSYNC: command has pid %d", child->pid);
661 } else {
662 child->pid = 0;
667 static int handle_ipc_event(merlin_event *pkt)
669 int result = 0;
671 if (pkt->hdr.type == CTRL_PACKET) {
672 switch (pkt->hdr.code) {
673 case CTRL_PATHS:
674 read_nagios_paths(pkt);
675 return 0;
677 case CTRL_ACTIVE:
678 result = handle_ctrl_active(&ipc, pkt);
679 /* -512 = incorrect number of peers, -256 = incorrect config.
680 * both are fine from IPC, but means we need to make sure all
681 * other nodes are disconnected before continuing
683 if (result == -512 || result == -256) {
684 int i;
685 result = 0;
686 for (i = 0; i < num_nodes; i++) {
687 node_disconnect(node_table[i], "Local config changed, node must reconnect with new config.");
689 } else if (result < 0) {
690 /* ipc is incompatible with us. weird */
691 return 0;
693 node_set_state(&ipc, STATE_CONNECTED, "Connected");
694 break;
696 case CTRL_INACTIVE:
697 /* this should really never happen, but forward it if it does */
698 memset(&ipc.info, 0, sizeof(ipc.info));
699 break;
700 default:
701 lwarn("forwarding control packet %d to the network",
702 pkt->hdr.code);
703 break;
708 * we must send to the network before we run mrm_db_update(),
709 * since the latter deblockifies the packet and makes it
710 * unusable in network transfers without repacking, but only
711 * if this isn't magically marked as a NONET event
713 if (pkt->hdr.code != MAGIC_NONET)
714 result = net_send_ipc_data(pkt);
716 /* skip sending control packets to database */
717 if (use_database && pkt->hdr.type != CTRL_PACKET)
718 result |= mrm_db_update(&ipc, pkt);
720 return result;
723 static int ipc_reap_events(void)
725 int len, events = 0;
726 merlin_event *pkt;
728 node_log_event_count(&ipc, 0);
730 len = node_recv(&ipc);
731 if (len < 0)
732 return len;
734 while ((pkt = node_get_event(&ipc))) {
735 events++;
736 handle_ipc_event(pkt);
739 return 0;
742 static int io_poll_sockets(void)
744 fd_set rd, wr;
745 int sel_val, ipc_listen_sock, nfound;
746 int sockets = 0;
747 struct timeval tv = { 2, 0 };
748 static time_t last_ipc_reinit = 0;
751 * Try re-initializing ipc if the module isn't connected
752 * and it was a while since we tried it.
754 if (ipc.sock < 0 && last_ipc_reinit + 5 < time(NULL)) {
755 ipc_reinit();
756 last_ipc_reinit = time(NULL);
759 ipc_listen_sock = ipc_listen_sock_desc();
760 sel_val = max(ipc.sock, ipc_listen_sock);
762 FD_ZERO(&rd);
763 FD_ZERO(&wr);
764 if (ipc.sock >= 0)
765 FD_SET(ipc.sock, &rd);
766 if (ipc_listen_sock >= 0)
767 FD_SET(ipc_listen_sock, &rd);
769 sel_val = net_polling_helper(&rd, &wr, sel_val);
770 if (sel_val < 0)
771 return 0;
773 nfound = select(sel_val + 1, &rd, &wr, NULL, &tv);
774 if (nfound < 0) {
775 lerr("select() returned %d (errno = %d): %s", nfound, errno, strerror(errno));
776 sleep(1);
777 return -1;
780 if (ipc_listen_sock > 0 && FD_ISSET(ipc_listen_sock, &rd)) {
781 linfo("Accepting inbound connection on ipc socket");
782 ipc_accept();
783 } else if (ipc.sock > 0 && FD_ISSET(ipc.sock, &rd)) {
784 sockets++;
785 ipc_reap_events();
788 sockets += net_handle_polling_results(&rd, &wr);
790 return 0;
793 static void dump_daemon_nodes(void)
795 int fd;
796 unsigned int i;
798 user_sig &= ~(1 << SIGUSR1);
800 fd = open("/tmp/merlind.nodeinfo", O_CREAT | O_TRUNC | O_WRONLY, 0644);
801 if (fd < 0) {
802 lerr("USERSIG: Failed to open /tmp/merlind.nodeinfo for dumping: %s", strerror(errno));
803 return;
806 dump_nodeinfo(&ipc, fd, 0);
807 for (i = 0; i < num_nodes; i++)
808 dump_nodeinfo(node_table[i], fd, i + 1);
811 static void polling_loop(void)
813 for (;!merlind_sig;) {
814 uint i;
815 time_t now = time(NULL);
817 if (user_sig & (1 << SIGUSR1))
818 dump_daemon_nodes();
821 * log the event count. The marker to prevent us from
822 * spamming the logs is in log_event_count() in logging.c
824 ipc_log_event_count();
826 /* reap any children that might have finished */
827 reap_child_process();
830 * reap_child_process() resets importer_pid if
831 * the import is completed.
832 * if it's not and at tops 5 seconds have passed,
833 * ask for some more time.
835 if (importer_pid && !(now % 5)) {
836 ipc_send_ctrl(CTRL_STALL, CTRL_GENERIC);
839 /* When the module is disconnected, we can't validate handshakes,
840 * so any negotiation would need to be redone after the module
841 * has started. Don't even bother.
843 if (ipc.state == STATE_CONNECTED) {
844 while (!merlind_sig && net_accept_one() >= 0)
845 ; /* nothing */
847 for (i = 0; !merlind_sig && i < num_nodes; i++) {
848 merlin_node *node = node_table[i];
849 /* try connecting if we're not already */
850 if (!net_is_connected(node) && node->state == STATE_NONE) {
851 net_try_connect(node);
856 if (merlind_sig)
857 return;
860 * io_poll_sockets() is the real worker. It handles network
861 * and ipc based IO and ships inbound events off to their
862 * right destination.
864 io_poll_sockets();
866 if (merlind_sig)
867 return;
870 * Try to commit any outstanding queries
872 sql_try_commit(0);
877 static void clean_exit(int sig)
879 if (sig) {
880 lwarn("Caught signal %d. Shutting down", sig);
883 ipc_deinit();
884 sql_close();
885 net_deinit();
886 daemon_shutdown();
888 if (!sig || sig == SIGINT || sig == SIGTERM)
889 exit(EXIT_SUCCESS);
890 exit(EXIT_FAILURE);
893 static void merlind_sighandler(int sig)
895 merlind_sig = sig;
898 static void sigusr_handler(int sig)
900 user_sig |= 1 << sig;
903 int merlind_main(int argc, char **argv)
905 int i, result, status = 0;
907 progname = strrchr(argv[0], '/');
908 progname = progname ? progname + 1 : argv[0];
910 is_module = 0;
911 self = &merlind;
912 ipc_init_struct();
913 gettimeofday(&merlind.start, NULL);
917 * Solaris doesn't support MSG_NOSIGNAL, so
918 * we ignore SIGPIPE globally instead
920 signal(SIGPIPE, SIG_IGN);
922 for (i = 1; i < argc; i++) {
923 char *opt, *arg = argv[i];
925 if (*arg != '-') {
926 if (!merlin_config_file) {
927 merlin_config_file = arg;
928 continue;
930 goto unknown_argument;
933 if (!strcmp(arg, "-h") || !strcmp(arg, "--help"))
934 usage(NULL);
935 if (!strcmp(arg, "-k") || !strcmp(arg, "--kill")) {
936 killing = 1;
937 continue;
939 if (!strcmp(arg, "-d") || !strcmp(arg, "--debug")) {
940 debug++;
941 continue;
943 if (!strcmp(arg, "-s")) {
944 status = 1;
945 continue;
948 if ((opt = strchr(arg, '=')))
949 opt++;
950 else if (i < argc - 1)
951 opt = argv[i + 1];
952 else
953 usage("Unknown argument, or argument '%s' requires a parameter", arg);
955 i++;
956 if (!strcmp(arg, "--config") || !strcmp(arg, "-c")) {
957 merlin_config_file = opt;
958 continue;
960 unknown_argument:
961 usage("Unknown argument: %s", arg);
964 if (!merlin_config_file)
965 usage("No config-file specified\n");
967 merlin_config_file = nspath_absolute(merlin_config_file, NULL);
968 if (!grok_config(merlin_config_file)) {
969 fprintf(stderr, "%s contains errors. Bailing out\n", merlin_config_file);
970 return 1;
973 if (!pidfile)
974 pidfile = "/var/run/merlin.pid";
976 if (killing)
977 return kill_daemon(pidfile);
979 if (status)
980 return daemon_status(pidfile);
982 if (use_database && !import_program) {
983 lwarn("Using database, but no import program configured. Are you sure about this?");
984 lwarn("If not, make sure you specify the import_program directive in");
985 lwarn("the \"daemon\" section of your merlin configuration file");
988 ipc.action = ipc_action_handler;
989 result = ipc_init();
990 if (result < 0) {
991 printf("Failed to initalize ipc socket: %s\n", strerror(errno));
992 return 1;
994 if (net_init() < 0) {
995 printf("Failed to initialize networking: %s\n", strerror(errno));
996 return 1;
999 if (!debug) {
1000 if (daemonize(merlin_user, NULL, pidfile, 0) < 0)
1001 exit(EXIT_FAILURE);
1004 * we'll leak these file-descriptors, but that
1005 * doesn't really matter as we just want accidental
1006 * output to go somewhere where it'll be ignored
1008 fclose(stdin);
1009 open("/dev/null", O_RDONLY);
1010 fclose(stdout);
1011 open("/dev/null", O_WRONLY);
1012 fclose(stderr);
1013 open("/dev/null", O_WRONLY);
1016 signal(SIGINT, merlind_sighandler);
1017 signal(SIGTERM, merlind_sighandler);
1018 signal(SIGUSR1, sigusr_handler);
1019 signal(SIGUSR2, sigusr_handler);
1021 sql_init();
1022 if (use_database && db_track_current) {
1023 sql_query("TRUNCATE TABLE program_status");
1024 sql_query("INSERT INTO program_status(instance_id, instance_name, is_running) "
1025 "VALUES(0, 'Local Nagios daemon', 0)");
1026 for (i = 0; i < (int)num_nodes; i++) {
1027 char *node_name;
1028 merlin_node *node = noc_table[i];
1030 sql_quote(node->name, &node_name);
1031 sql_query("INSERT INTO program_status(instance_id, instance_name, is_running) "
1032 "VALUES(%d, %s, 0)", node->id + 1, node_name);
1033 safe_free(node_name);
1036 state_init();
1037 linfo("Merlin daemon %s successfully initialized", merlin_version);
1038 polling_loop();
1040 clean_exit(0);
1042 return 0;