Fix various bugs; see ChangeLog
[clumanager.git] / src / daemons / clusvcmgrd.c
blobfc094c889c8a27a5ddb37a30d06e86b049d93092
1 /*
2 Copyright Red Hat, Inc. 2002-2003
4 This program is free software; you can redistribute it and/or modify it
5 under the terms of the GNU General Public License as published by the
6 Free Software Foundation; either version 2, or (at your option) any
7 later version.
9 This program is distributed in the hope that it will be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; see the file COPYING. If not, write to the
16 Free Software Foundation, Inc., 675 Mass Ave, Cambridge,
17 MA 02139, USA.
19 /** @file
20 * Service Manager for RHCM. This is the 1.0.x service manager with
21 * extras to make it multi-node capable.
23 * Author: Brian Stevens (bstevens at redhat.com)
24 * Lon Hohberger (lhh at redhat.com)
28 /*static const char *version __attribute__ ((unused)) = "$Revision: 1.71 $";*/
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <getopt.h>
34 #include <signal.h>
35 #include <string.h>
36 #include <errno.h>
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/wait.h>
40 #include <sys/syslog.h>
41 #include <sys/time.h>
42 #include <linux/reboot.h>
43 #include <sys/reboot.h>
44 #include <sched.h>
45 #include <clusterdefs.h>
46 #include <xmlwrap.h>
47 #include <clu_lock.h>
48 #include <msgsvc.h>
49 #include <svcmgr.h>
50 #include <clulog.h>
51 #include <quorum.h>
52 #include <clushared.h>
53 #include <sharedstate.h>
54 #include <namespace.h>
55 #include <findproc.h>
57 #ifdef TESTING
58 #ifdef reboot
59 #undef reboot
60 #endif
61 #define reboot(arg) {\
62 clulog(LOG_EMERG, "reboot(%s) @ %s:%d\n", #arg, __FILE__, __LINE__); \
63 raise(SIGSTOP); \
65 #endif
67 #define svc_fail(x) \
68 do { \
69 clulog(LOG_DEBUG, "Service %d failed @ %s:%d\n",\
70 x, __FILE__, __LINE__); \
71 _svc_fail(x); \
72 } while(0)
74 #define HEARTBEAT_INTERVAL 60
75 #define CHECK_INTERVAL 5
76 #define MSG_TIMEOUT 10
78 #define SVCF_START_DISABLED 1
79 #define SVCF_PENDING 2
80 #define SVCF_RESTART 4
81 #define SVCF_CLEAR_FAILURES 8
82 #define SVCF_RESTARTFAILED 16
84 static int myNodeID;
85 static int myNodeState;
86 static int services_locked = 0;
87 #ifdef OLD_CLU_ALIAS
88 static int alias_owner = -1;
89 #endif
90 static char *myNodeName = NULL;
91 static int ticks[MAX_SERVICES];
92 static memb_mask_t membership, mask_panic;
93 static int sighup_received = 0, sigterm_received = 0;
95 struct child_sm {
96 pid_t cs_pid;
97 int cs_rq;
100 static struct child_sm svc_children[MAX_SERVICES];
103 * from clusvcmgrd_cfg.c:
105 int check_config_file(void);
106 int check_config_data(void);
107 int rebuild_config_lockless(void);
108 int rebuild_config(void);
109 int handle_config_update(memb_mask_t mask, int my_node_id);
110 int boot_config_init(void);
114 * Service action string table
116 static char *serviceActionStrings[] = {
117 SVC_NONE_STR,
118 SVC_ADD_STR,
119 SVC_REMOVE_STR,
120 SVC_START_STR,
121 SVC_STOP_STR,
122 SVC_CHECK_STR,
123 SVC_DISABLE_STR,
124 SVC_RELOCATE_STR,
125 SVC_STATUS_INQUIRY_STR,
126 SVC_FAILBACK_STR,
127 SVC_START_PENDING_STR,
128 SVC_START_RELOCATE_STR,
129 "reconfigure", /* XXX */
130 SVC_RESTART_STR
133 extern void daemon_init(char *);
135 static int svc_stop(int, int);
136 static int svc_stop_unclean(int);
137 static int _svc_fail(int svcID);
139 #ifdef OLD_CLU_ALIAS
140 static int clu_alias(int);
141 #endif
143 static int request_failback(int);
144 static int failback(int);
146 static int relocate_service(int svcID, int request, int target);
147 static void handle_svc_request(int, int, int, msg_handle_t);
149 int svc_report_failure(int svcID);
150 int setServiceStatus(ServiceBlock *svcblk);
151 int getServiceStatus(int svcNum, ServiceBlock *svcblk);
152 int removeService(int svcNum);
156 * Block the given signal.
158 * @param sig Signal to block.
159 * @return See man sigprocmask.
161 static int
162 block_signal(int sig)
164 sigset_t set;
166 sigemptyset(&set);
167 sigaddset(&set, sig);
169 return(sigprocmask(SIG_BLOCK, &set, NULL));
174 * unblock the given signal.
176 * @param sig Signal to unblock.
177 * @return See man sigprocmask.
179 static int
180 unblock_signal(int sig)
182 sigset_t set;
184 sigemptyset(&set);
185 sigaddset(&set, sig);
187 return(sigprocmask(SIG_UNBLOCK, &set, NULL));
191 static void
192 set_facility(void)
194 char *p;
195 if (CFG_Get((char *) "cluster%logfacility", NULL, &p) == CFG_OK) {
196 if (p)
197 clu_set_facility(p);
203 * Send a SVC_FAILBACK request to the given partner member.
205 * @param partner Partner we are sending request to.
206 * @return FAIL or SUCCESS
207 * @see failback
210 request_failback(int partner)
212 msg_handle_t fd_failback;
213 SmMessageSt msg_failback;
215 if (partner == myNodeID)
216 return SUCCESS;
219 * Fork here to avoid deadlock.
221 switch(fork()) {
222 case 0:
223 break;
224 case -1:
225 return FAIL;
226 default:
227 return SUCCESS;
230 msg_failback.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
231 msg_failback.sm_hdr.gh_command = SVC_ACTION_REQUEST;
232 msg_failback.sm_hdr.gh_length = sizeof (SmMessageSt);
233 msg_failback.sm_data.d_action = SVC_FAILBACK;
234 msg_failback.sm_data.d_svcOwner = myNodeID;
235 msg_failback.sm_data.d_ret = 0;
237 if ((fd_failback = msg_open(PROCID_CLUSVCMGRD, partner)) < 0) {
238 clulog(LOG_DEBUG, "Failed opening connection to svcmgrd\n");
239 exit(1);
242 /* Encode */
243 swab_SmMessageSt(&msg_failback);
245 if (msg_send(fd_failback, &msg_failback, sizeof (SmMessageSt)) !=
246 sizeof (SmMessageSt)) {
247 msg_close(fd_failback);
248 clulog(LOG_ERR, "Error sending failback request.\n");
249 exit(1);
251 msg_close(fd_failback);
252 exit(0);
257 * Handle SVC_FAILBACK from a given node. This shuts down services which
258 * should be running on 'target' instead of 'myNodeID'. Takes into account
259 * service failover domain and preferred node ordering. Services without
260 * a failover domain will never be sent to the requesting node.
262 * @param target Requestor which sent us SVC_FAILBACK
263 * @return SUCCESS
264 * @see request_failback
267 failback(int target)
269 int svcID;
271 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
272 if (serviceExists(svcID) != YES)
273 continue;
275 if (!svc_has_domain(svcID))
276 continue;
279 * If the service has a failover domain, and the requestor
280 * should run it and I shouldn't, then I will give the
281 * service back.
283 * This relies on handle_svc_request to determine the
284 * state of the service.
286 if (node_should_start(myNodeID, membership, svcID) <
287 node_should_start(target, membership, svcID))
288 handle_svc_request(svcID, SVC_RELOCATE, target, -1);
291 return SUCCESS;
296 * See if a child process operating on a specified service has exited.
298 * @param svcID Service ID's child we're checking out.
299 * @return 0 indicates that no child has exited. 1 indicates
300 * that the child for the service has, indeed, been
301 * cleaned up.
304 cleanup_child(int svcID)
306 /* Obvious check: is there even a child for this service? */
307 if (!svc_children[svcID].cs_pid)
308 return 1;
309 if (waitpid(svc_children[svcID].cs_pid, NULL, WNOHANG) != -1)
310 return 0;
311 if (errno != ECHILD)
312 return 0;
314 svc_children[svcID].cs_pid = 0;
315 return 1;
320 * Clean up children. This is our SIGCHLD handler.
322 void
323 reap_zombies(int __attribute__ ((unused)) sig)
325 int svcID;
326 int pid;
327 //int nchildren = 0;
328 int status;
330 while ((pid = waitpid(-1, &status, WNOHANG)) != 0) {
331 if (pid < 0) {
332 if (errno == EINTR)
333 continue;
334 break; /* No children */
337 /*clulog(LOG_DEBUG, "waitpid reaped %d\n", pid);*/
338 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
339 if (pid == svc_children[svcID].cs_pid) {
340 svc_children[svcID].cs_pid = 0;
341 //nchildren++;
345 //return (nchildren);
350 * Clean up services and exit.
352 * @param status Return value passed up to parent process.
353 * @param clean This is set to '1' when we're cleanly shutting down
354 * and we have quorum. Without quorum or during an
355 * unclean shutdown, this is 0.
356 * @return If it returns, that's BAD
358 static void
359 svcmgr_exit(int status, int clean)
361 int svcID, fd;
363 #ifdef OLD_CLU_ALIAS
364 clu_alias(0);
365 #endif
367 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
369 if (serviceExists(svcID) != YES)
370 continue;
372 /* Wait for child process acting on this service to exit */
373 while (!cleanup_child(svcID))
374 sleep(5);
376 if (clean) {
377 switch(svc_stop(svcID, 0)) {
378 case FAIL:
379 svc_fail(svcID);
380 svc_report_failure(svcID);
381 break;
382 case ABORT:
383 /* Lock failure during shutdown == switch to
384 unclean mode */
385 clulog(LOG_ERR, "Failed to acquire cluster lock "
386 "during shutdown\n");
388 clean = 0;
389 break;
390 case SUCCESS:
391 default:
392 continue;
396 /* Succeed || Die */
397 svc_stop_unclean(svcID);
401 * Tell the quorum daemon that we are leaving
403 clulog(LOG_DEBUG, "Sending message to quorumd that we are exiting\n");
405 if ((fd = msg_open(PROCID_CLUQUORUMD, myNodeID)) < 0) {
406 clulog(LOG_ERR, "msg_open failed to quorum daemon\n");
407 exit(status);
410 if (msg_send_simple(fd, QUORUM_EXIT, status, 0) == -1) {
411 clulog(LOG_ERR, "Failed sending exit message to cluquorumd\n");
412 msg_close(fd);
413 exit(status);
416 msg_close(fd);
417 clulog(LOG_INFO, "Exiting\n");
418 exit(status);
423 * NOTE: If someone kills the service manager during start, it's possible to
424 * have a service script still running the stop phase. This is OKAY!
425 * This is our SIGTERM handler.
427 * @see svcmgr_exit
429 static void
430 sigterm_handler(void)
432 block_signal(SIGHUP);
433 block_signal(SIGTERM);
434 sigterm_received = 1;
439 * Retrieve our log level from the cluster database and set it accordingly.
441 static void
442 set_loglevel(void)
444 int level;
446 if (getSvcMgrLogLevel(&level) == FAIL) {
447 clulog(LOG_ERR,
448 "Failed getting log level for from config database\n");
449 return;
452 if (clu_set_loglevel(level) == -1) {
453 clulog(LOG_ERR, "Failed setting log level\n");
459 * Noitify the local daemons that the on-disk configuration has changed, and
460 * so needs to be reread.
462 static void
463 notify_everybody(void)
466 * Notify local daemons of the cofiguration update...
468 killall("clumembd", SIGHUP);
469 killall("cluquorumd", SIGHUP);
470 killall("clulockd", SIGHUP);
471 killall("clurmtabd", SIGHUP);
476 * Handle an updated configuration. This is called after we receive a SIGHUP.
478 * @see sighup_handler
480 void
481 update_config(void)
483 int really_updated = 1;
485 block_signal(SIGHUP);
486 /* XXX check for return code?? */
487 /* We reload the msg service stuff inside handle_config_update */
488 really_updated = handle_config_update(membership, myNodeID);
490 if (really_updated == 0) {
491 set_loglevel();
492 notify_everybody();
496 * If we fail to update, the other service managers will reread
497 * the shared config in a few seconds anyway.
499 unblock_signal(SIGHUP);
504 * When we receive SIGHUP, we set the global flag. We soon after call
505 * update_config.
507 * @see update_config
509 void
510 sighup_handler(int __attribute__ ((unused)) sig)
512 sighup_received = 1;
517 * Run the service script for a given service. The service scripts handle
518 * the real meat of starting/stopping services.
520 * @param action The action to take (ie, start/stop/status)
521 * @param svcID The service ID we intend to take 'action' on.
522 * @param block Set to 0 if the service script should run in the
523 * background, 1 if we should wait for it to complete
524 * before continuing.
525 * @param ret The return code of the service script.
526 * @return SUCCESS or FAIL.
528 static int
529 exec_service_script(char *action, int svcID, int block, int *ret)
531 int pid;
532 char svcIDstr[8];
533 char *svcName;
534 int local_ret = 0;
535 sigset_t set;
536 struct sched_param param;
538 getSvcName(svcID, &svcName);
540 clulog(LOG_DEBUG, "Exec of script %s, action %s, service %s\n",
541 SVC_ACTION_SCRIPT, action, svcName);
543 pid = fork();
544 if (pid < 0) {
545 clulog(LOG_ERR, "fork failed: %s", strerror(errno));
546 return FAIL;
548 if (pid) {
550 if (ret)
551 *ret = 0;
553 if (block) {
554 do {
555 pid = waitpid(pid, &local_ret, 0);
556 if (pid < 0) {
557 if (errno == EINTR)
558 continue;
559 clulog(LOG_DEBUG,
560 "waitpid: %s",
561 strerror(errno));
562 /* Fake it. */
563 local_ret = 0;
565 } while (0);
567 clulog(LOG_DEBUG,
568 "Exec of script for service %s returned %d\n",
569 svcName, local_ret);
570 if (ret)
571 *ret = local_ret;
573 return SUCCESS;
577 * we need to set the sched_priority back to normal in case clusvcmgrd
578 * is running in a different prio b/c cluquorumd%rtp is set
580 if (sched_getscheduler(0) != SCHED_OTHER) {
581 memset(&param,0,sizeof(param));
582 param.sched_priority = 0;
583 if (sched_setscheduler(0, SCHED_OTHER, (void *)&param) != 0)
584 clulog(LOG_WARNING, "Setting child to normal priority "
585 "failed: %s\n", strerror(errno));
586 else
587 clulog(LOG_DEBUG, "Using normal priority\n");
590 /* lhh - Unblock signals so the user script doesn't break */
591 sigfillset(&set);
592 if (sigprocmask(SIG_UNBLOCK, &set, NULL) != 0) {
593 clulog(LOG_WARNING, "Failed to unblock signals: %s\n",
594 strerror(errno));
597 snprintf(svcIDstr, sizeof (svcIDstr), "%d", svcID);
598 local_ret =
599 execl(SVC_ACTION_SCRIPT, SVC_ACTION_SCRIPT, action, svcIDstr, NULL);
601 clulog(LOG_DEBUG, "Exec failed of %s, action %s, service %s, err %s\n",
602 SVC_ACTION_SCRIPT, action, svcName, strerror(errno));
604 exit(local_ret);
608 #ifdef OLD_CLU_ALIAS
609 static int
610 clu_alias(int req)
612 int pid;
613 int local_ret;
615 if (req) {
616 if (alias_owner == myNodeID)
617 return SUCCESS;
618 alias_owner = myNodeID;
619 clulog(LOG_DEBUG, "Start cluster alias request\n");
620 } else {
621 if (alias_owner != myNodeID)
622 return SUCCESS;
623 alias_owner = -1;
624 clulog(LOG_DEBUG, "Stop cluster alias request\n");
627 pid = fork();
628 if (pid < 0) {
629 clulog(LOG_ERR, "fork failed: %s", strerror(errno));
630 return FAIL;
632 if (pid) {
633 do {
634 pid = waitpid(pid, &local_ret, 0);
635 if ((pid < 0) && (errno == EINTR))
636 continue;
637 } while (0);
639 clulog(LOG_DEBUG, "Exec of alias script returned %d\n",
640 local_ret);
641 return local_ret;
644 block_signal(SIGTERM);
645 block_signal(SIGHUP);
647 if (req)
648 local_ret =
649 execl(CLU_ALIAS_SCRIPT, CLU_ALIAS_SCRIPT, "start", NULL);
650 else
651 local_ret =
652 execl(CLU_ALIAS_SCRIPT, CLU_ALIAS_SCRIPT, "stop", NULL);
654 clulog(LOG_DEBUG, "Exec failed of %s, err %s\n", CLU_ALIAS_SCRIPT,
655 strerror(errno));
657 exit(local_ret);
659 #endif
663 * Initialize an on-disk service block.
665 * @param svcID Service ID whose block we need to update.
666 * @return FAIL or SUCCESS.
669 svc_add(int svcID)
671 ServiceBlock svcStatus;
673 clulog(LOG_DEBUG, "Initializing service #%d\n", svcID);
676 * Make sure the service does not exist
679 if (clu_svc_lock(svcID) == -1) {
680 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
681 strerror(errno));
682 return FAIL;
685 if (getServiceStatus(svcID, &svcStatus) == SUCCESS) {
686 clulog(LOG_ERR,
687 "Service #%d already exists!\n",
688 svcID);
689 clu_svc_unlock(svcID);
690 return FAIL;
693 svcStatus.sb_id = svcID;
694 svcStatus.sb_owner = NODE_ID_NONE;
695 svcStatus.sb_last_owner = NODE_ID_NONE;
696 svcStatus.sb_state = SVC_DISABLED;
697 svcStatus.sb_transition = (uint64_t)time(NULL);
698 svcStatus.sb_restarts = 0;
700 if (setServiceStatus(&svcStatus) != SUCCESS) {
701 (void) removeService(svcID);
702 clu_svc_unlock(svcID);
703 return FAIL;
706 clu_svc_unlock(svcID);
707 return SUCCESS;
712 * Set an on-disk service block's state to UNINITIALIZED.
714 * @param svcID Service ID whose block we need to update.
715 * @return FAIL or SUCCESS.
718 svc_remove(int svcID)
720 clulog(LOG_DEBUG, "Removing service #%d from database\n", svcID);
722 if (clu_svc_lock(svcID) == -1) {
723 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
724 strerror(errno));
725 return FAIL;
728 if (removeService(svcID) != 0) {
729 clulog(LOG_ERR, "Failed removing service %d from database\n",
730 svcID);
731 clu_svc_unlock(svcID);
732 return FAIL;
735 clu_svc_unlock(svcID);
736 return SUCCESS;
741 * Advise service manager as to whether or not to start a service, given
742 * that we already know it's legal to run the service.
744 * @param svcStatus Current service status.
745 * @param svcName Service name
746 * @param flags Specify whether or not it's legal to start a
747 * disabled service, etc.
748 * @return 0 = DO NOT start service, return FAIL
749 * 1 = START service - return whatever it returns.
750 * 2 = DO NOT start service, return SUCCESS
753 svc_advise_start(ServiceBlock *svcStatus, char *svcName, int flags)
755 char *nodeName;
757 switch(svcStatus->sb_state) {
758 case SVC_FAILED:
759 clulog(LOG_ERR, "Service %s has failed on all applicable "
760 "members; can not start.\n", svcName);
761 return 0;
763 case SVC_STARTED:
764 case SVC_STOPPING:
765 getNodeName(svcStatus->sb_owner, &nodeName);
766 if ((svcStatus->sb_owner == myNodeID) ||
767 (memb_online(membership, svcStatus->sb_owner)==1) ||
768 (memb_online(mask_panic, svcStatus->sb_owner)==1)) {
770 * Service is running and the owner is online!
772 clulog(LOG_DEBUG,
773 "Service is running on member %s.\n",
774 nodeName);
775 return 2;
779 * Service is running but owner is down -> FAILOVER
781 clulog(LOG_NOTICE,
782 "Taking over service %s from down member %s\n",
783 svcName, nodeName);
784 return 1;
786 case SVC_PENDING:
788 * Starting failed service...
790 if (flags & SVCF_PENDING) {
791 clulog(LOG_NOTICE, "Starting failed service %s\n",
792 svcName);
793 svcStatus->sb_state = SVC_STOPPED;
794 /* Start! */
795 return 1;
798 /* Don't start, but return success. */
799 clulog(LOG_DEBUG,
800 "Not starting %s: pending/transitional state\n",
801 svcName);
802 return 2;
804 case SVC_STOPPED:
805 clulog(LOG_NOTICE, "Starting stopped service %s\n", svcName);
806 return 1;
808 case SVC_DISABLED:
809 case SVC_UNINITIALIZED:
810 if (flags & SVCF_START_DISABLED) {
811 clulog(LOG_NOTICE, "Starting disabled service %s\n",
812 svcName);
813 return 1;
816 clulog(LOG_DEBUG, "Not starting disabled service %s\n",
817 svcName);
818 return 0;
820 default:
821 clulog(LOG_ERR,
822 "Cannot start service %s: Invalid State %d\n",
823 svcName, svcStatus->sb_state);
824 return 0;
830 * Start a cluster service.
832 * @param svcID Service ID to start.
833 * @param flags Service-operation specific flags to take into account.
834 * @see svc_advise_start
835 * @return FAIL, SUCCESS
837 static int
838 svc_start(int svcID, int flags)
840 char *svcName;
841 ServiceBlock svcStatus;
842 int ret;
844 getSvcName(svcID, &svcName);
845 clulog(LOG_DEBUG, "Handling start request for service %s\n", svcName);
847 if (clu_svc_lock(svcID) == -1) {
848 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
849 strerror(errno));
850 return FAIL;
853 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
854 clu_svc_unlock(svcID);
855 clulog(LOG_ERR, "Failed getting status for service %s\n",
856 svcName);
857 return FAIL;
860 /* LOCK HELD */
861 switch (svc_advise_start(&svcStatus, svcName, flags)) {
862 case 0: /* Don't start service, return FAIL */
863 clu_svc_unlock(svcID);
864 return FAIL;
865 case 1: /* Start service. */
866 break;
867 case 2: /* Don't start service, return SUCCESS */
868 clu_svc_unlock(svcID);
869 return SUCCESS;
871 default:
872 break;
875 /* LOCK HELD if we get here */
876 #if 0
877 if (flags & SVCF_CLEAR_FAILURES)
878 memset(svcStatus.sb_failed_mask, 0, sizeof(memb_mask_t));
879 #endif
881 svcStatus.sb_owner = myNodeID;
882 svcStatus.sb_state = SVC_STARTED;
883 svcStatus.sb_transition = (uint64_t)time(NULL);
884 svcStatus.sb_checks = (uint16_t)0;
886 if (flags & (SVCF_START_DISABLED|SVCF_PENDING))
887 svcStatus.sb_false_starts = (uint16_t)0;
889 if (flags & SVCF_RESTARTFAILED)
890 svcStatus.sb_restarts++;
891 else
892 svcStatus.sb_restarts = 0;
894 if (setServiceStatus(&svcStatus) != SUCCESS) {
895 clulog(LOG_ERR, "Failed changing service status\n");
896 clu_svc_unlock(svcID);
897 return FAIL;
900 clu_svc_unlock(svcID);
902 if ((exec_service_script(SVC_START_STR, svcID, 1, &ret) != SUCCESS) ||
903 (ret)) {
904 return FAIL;
907 return SUCCESS;
911 static int
912 flip_state(char *svcName, int svcID, int state, int last_owner_flip)
914 ServiceBlock svcStatus;
916 if (clu_svc_lock(svcID) == -1) {
917 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
918 strerror(errno));
919 return ABORT;
922 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
923 clu_svc_unlock(svcID);
924 clulog(LOG_ERR, "Failed getting status for service %s\n",
925 svcName);
926 return FAIL;
929 if (last_owner_flip) {
930 svcStatus.sb_last_owner = svcStatus.sb_owner;
931 svcStatus.sb_owner = NODE_ID_NONE;
933 svcStatus.sb_state = state;
934 svcStatus.sb_transition = (uint64_t)time(NULL);
935 if (setServiceStatus(&svcStatus) != SUCCESS) {
936 clu_svc_unlock(svcID);
937 clulog(LOG_ERR, "Failed changing service status\n");
938 return FAIL;
940 clu_svc_unlock(svcID);
941 return SUCCESS;
947 * Stop a cluster service.
949 * @param svcID Service ID to stop.
950 * @param flags Service-operation specific flags to take into account.
951 * @see svc_advise_start
952 * @return FAIL, SUCCESS
954 static int
955 svc_stop(int svcID, int flags)
957 ServiceBlock svcStatus;
958 char *svcName;
959 int ret;
961 getSvcName(svcID, &svcName);
962 clulog(LOG_DEBUG, "Handling stop request for service %s\n", svcName);
964 if (clu_svc_lock(svcID) == -1) {
965 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
966 strerror(errno));
967 return ABORT;
970 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
971 clu_svc_unlock(svcID);
972 clulog(LOG_ERR, "Failed getting status for service %s\n",
973 svcName);
974 return FAIL;
977 if (((svcStatus.sb_state != SVC_STARTED) ||
978 (svcStatus.sb_owner != myNodeID))
979 && (svcStatus.sb_state != SVC_PENDING)) {
980 clu_svc_unlock(svcID);
981 clulog(LOG_DEBUG, "Unable to stop service %s in %s state\n",
982 svcName, serviceStateStrings[svcStatus.sb_state]);
983 return SUCCESS;
986 svcStatus.sb_state = SVC_STOPPING;
987 svcStatus.sb_transition = (uint64_t)time(NULL);
988 if (setServiceStatus(&svcStatus) != SUCCESS) {
989 clu_svc_unlock(svcID);
990 clulog(LOG_ERR, "Failed changing service status\n");
991 return FAIL;
993 clu_svc_unlock(svcID);
995 if ((exec_service_script(SVC_STOP_STR, svcID, 1, &ret) != SUCCESS) ||
996 (ret)) {
997 return FAIL;
1000 if (flags & SVCF_PENDING)
1001 ret = SVC_PENDING;
1002 else
1003 ret = SVC_STOPPED;
1005 flip_state(svcName, svcID, ret, 1);
1007 return SUCCESS;
1012 * Stop a cluster service - without updating the on-disk-block.
1014 * @param svcID Service ID to stop.
1015 * @return FAIL, SUCCESS
1017 static int
1018 svc_stop_unclean(int svcID)
1020 int ret;
1021 char *svcName;
1024 * Infanticide.
1026 if (svc_children[svcID].cs_pid) {
1027 kill(svc_children[svcID].cs_pid, SIGKILL);
1029 do {
1030 if ((waitpid(svc_children[svcID].cs_pid, NULL, 0)==-1)
1031 && (errno == EINTR))
1032 continue;
1033 } while (0);
1036 getSvcName(svcID, &svcName);
1037 clulog(LOG_WARNING, "Forcing stop of service %s\n", svcName);
1039 if ((exec_service_script(SVC_STOP_STR, svcID, 1, &ret) != SUCCESS) ||
1040 (ret)) {
1041 clulog(LOG_EMERG,
1042 "Failed to stop service %s uncleanly - REBOOTING\n",
1043 svcName);
1044 sleep(1);
1045 REBOOT(RB_AUTOBOOT);
1047 return SUCCESS;
1052 * Disable a cluster service. Services in the disabled state are never
1053 * automatically started by the service manager - one must send a SVC_START
1054 * message.
1056 * @param svcID Service ID to stop.
1057 * @return FAIL, SUCCESS
1059 static int
1060 svc_disable(int svcID)
1062 ServiceBlock svcStatus;
1063 char *svcName;
1064 int ret;
1066 if (clu_svc_lock(svcID) == -1) {
1067 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1068 strerror(errno));
1069 return FAIL;
1072 getSvcName(svcID, &svcName);
1073 clulog(LOG_DEBUG, "Handling disable request for service %s\n", svcName);
1075 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1076 clu_svc_unlock(svcID);
1077 clulog(LOG_ERR, "Failed getting status for service %s\n",
1078 svcName);
1079 return FAIL;
1082 if (svcStatus.sb_state == SVC_DISABLED) {
1083 clu_svc_unlock(svcID);
1084 clulog(LOG_DEBUG, "Service %s already disabled\n", svcName);
1085 return SUCCESS;
1088 if (((svcStatus.sb_state == SVC_STOPPING) &&
1089 (svcStatus.sb_owner != myNodeID)) &&
1090 (memb_online(membership, svcStatus.sb_owner)==1)) {
1091 clulog(LOG_WARNING,
1092 "Service %s is in stop-transition on node %d"
1093 ", cannot disable\n",
1094 svcName);
1095 return SUCCESS;
1098 if (((svcStatus.sb_state == SVC_STARTED) &&
1099 (svcStatus.sb_owner != myNodeID))
1100 || ((svcStatus.sb_state != SVC_STARTED)
1101 && (svcStatus.sb_state != SVC_STOPPING)
1102 && (svcStatus.sb_state != SVC_STOPPED)
1103 && (svcStatus.sb_state != SVC_PENDING)
1104 && (svcStatus.sb_state != SVC_FAILED))) {
1105 clu_svc_unlock(svcID);
1106 clulog(LOG_DEBUG, "Unable to disable service %s in %s state\n",
1107 svcName, serviceStateStrings[svcStatus.sb_state]);
1108 return FAIL;
1111 svcStatus.sb_state = SVC_STOPPING;
1112 svcStatus.sb_transition = (uint64_t)time(NULL);
1113 if (setServiceStatus(&svcStatus) != SUCCESS) {
1114 clu_svc_unlock(svcID);
1115 clulog(LOG_ERR, "Failed changing service status\n");
1116 return FAIL;
1118 clu_svc_unlock(svcID);
1120 if ((exec_service_script(SVC_STOP_STR, svcID, 1, &ret) != SUCCESS) ||
1121 (ret)) {
1122 return FAIL;
1125 flip_state(svcName, svcID, SVC_DISABLED, 1);
1127 return SUCCESS;
1132 * Mark a cluster service as failed. User intervention required.
1134 * @param svcID Service ID to stop.
1135 * @return FAIL, SUCCESS
1137 static int
1138 _svc_fail(int svcID)
1140 ServiceBlock svcStatus;
1141 char *svcName;
1143 if (clu_svc_lock(svcID) == -1) {
1144 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1145 strerror(errno));
1146 return FAIL;
1149 getSvcName(svcID, &svcName);
1150 clulog(LOG_DEBUG, "Handling failure request for service %s\n", svcName);
1152 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1153 clu_svc_unlock(svcID);
1154 clulog(LOG_ERR, "Failed getting status for service %s\n",
1155 svcName);
1156 return FAIL;
1159 if ((svcStatus.sb_state == SVC_STARTED) &&
1160 (svcStatus.sb_owner != myNodeID)) {
1161 clu_svc_unlock(svcID);
1162 clulog(LOG_DEBUG, "Unable to disable service %s in %s state\n",
1163 svcName, serviceStateStrings[svcStatus.sb_state]);
1164 return FAIL;
1168 * Leave a bread crumb so we can debug the problem with the service!
1170 if (svcStatus.sb_owner != NODE_ID_NONE) {
1171 svcStatus.sb_last_owner = svcStatus.sb_owner;
1172 svcStatus.sb_owner = NODE_ID_NONE;
1174 svcStatus.sb_state = SVC_FAILED;
1175 svcStatus.sb_transition = (uint64_t)time(NULL);
1176 svcStatus.sb_restarts = 0;
1177 if (setServiceStatus(&svcStatus) != SUCCESS) {
1178 clu_svc_unlock(svcID);
1179 clulog(LOG_ERR, "Failed changing service status\n");
1180 return FAIL;
1182 clu_svc_unlock(svcID);
1184 return SUCCESS;
1189 * Check the status of a given service. This execs the service script
1190 * with the argument 'status', and evaluates the return code.
1192 * @param svcID Service ID to check.
1193 * @return FAIL or SUCCESS.
1195 static int
1196 svc_check(int svcID)
1198 ServiceBlock svcStatus;
1199 char *svcName, *maxrestarts, *maxfs;
1200 int script_ret, ret;
1202 getSvcName(svcID, &svcName);
1203 clulog(LOG_DEBUG, "Handling check request for service %s\n", svcName);
1205 if (clu_svc_lock(svcID) == -1) {
1206 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1207 strerror(errno));
1208 return FAIL;
1211 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1212 clu_svc_unlock(svcID);
1213 clulog(LOG_ERR, "Failed getting status for service %s\n",
1214 svcName);
1215 return FAIL;
1218 if ((svcStatus.sb_state != SVC_STARTED) ||
1219 (svcStatus.sb_owner != myNodeID)) {
1220 clu_svc_unlock(svcID);
1221 clulog(LOG_DEBUG, "Unable to check service %s in %s state\n",
1222 svcName, serviceStateStrings[svcStatus.sb_state]);
1223 return FAIL;
1225 clu_svc_unlock(svcID);
1227 if ((exec_service_script(SVC_CHECK_STR, svcID, 1, &ret) != SUCCESS) ||
1228 (ret))
1229 script_ret = FAIL;
1230 else
1231 script_ret = SUCCESS;
1233 clu_svc_lock(svcID);
1234 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1235 clu_svc_unlock(svcID);
1236 clulog(LOG_ERR, "Failed getting status for service %s\n",
1237 svcName);
1238 return FAIL;
1241 if (script_ret == FAIL) {
1242 ret = FAIL;
1244 if (svcStatus.sb_checks == 0 &&
1245 (getSvcMaxFalseStarts(svcID, &maxfs) == SUCCESS) &&
1246 atoi(maxfs) > 0) {
1248 /* If we've exceeded false-start count, relocate */
1249 svcStatus.sb_false_starts++;
1250 clulog(LOG_WARNING,
1251 "Service %s false-start detected (%d/%d)\n",
1252 svcName, svcStatus.sb_false_starts, atoi(maxfs));
1254 if (svcStatus.sb_false_starts > atoi(maxfs)) {
1255 clulog(LOG_ERR, "Max false starts for service %s"
1256 " exceeded. Relocating\n", svcName);
1257 ret = ABORT;
1260 /* Update on-disk with new false start info */
1261 setServiceStatus(&svcStatus);
1264 if (getSvcMaxRestarts(svcID, &maxrestarts) == SUCCESS) {
1265 if (atoi(maxrestarts) > 0) {
1266 /* We're about to restart. If we would exceed
1267 our restart count, relocate. */
1268 if (svcStatus.sb_restarts >=
1269 atoi(maxrestarts)) {
1270 clulog(LOG_ERR, "Max restarts for "
1271 "service %s exceeded. "
1272 "Relocating\n", svcName);
1273 ret = ABORT;
1275 } else if (atoi(maxrestarts) < 0) {
1276 clulog(LOG_ERR, "Service %s failed. "
1277 "Relocating\n", svcName);
1278 ret = ABORT;
1281 } else { /* SUCCESS */
1282 ret = SUCCESS;
1283 if (!svcStatus.sb_checks) {
1284 svcStatus.sb_checks = 1;
1285 svcStatus.sb_false_starts = 0;
1286 setServiceStatus(&svcStatus);
1290 clu_svc_unlock(svcID);
1292 return ret;
1296 static int
1297 init_services(void)
1299 int svcID;
1300 ServiceBlock svcStatus;
1301 char *svcName;
1303 clulog(LOG_INFO, "Initializing services\n");
1305 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
1307 /* This takes a long time... Abort quickly if necessary */
1308 if (sigterm_received)
1309 svcmgr_exit(0, 1);
1311 if (serviceExists(svcID) != YES)
1312 continue;
1314 getSvcName(svcID, &svcName);
1317 * If service is not on the shared service information disk,
1318 * or it is running and owned by this node, reinitialized it.
1321 if (clu_svc_lock(svcID) == 0) {
1322 if ((getServiceStatus(svcID, &svcStatus) != SUCCESS) ||
1323 ((svcStatus.sb_owner == myNodeID) &&
1324 ((svcStatus.sb_state == SVC_STARTED) ||
1325 (svcStatus.sb_state == SVC_STOPPING))) ||
1326 ((svcStatus.sb_owner == NODE_ID_NONE) &&
1327 (svcStatus.sb_state == SVC_PENDING))) {
1328 svcStatus.sb_id = svcID;
1329 svcStatus.sb_last_owner = svcStatus.sb_owner;
1330 svcStatus.sb_owner = NODE_ID_NONE;
1331 svcStatus.sb_state = SVC_STOPPED;
1332 svcStatus.sb_transition = (uint64_t)time(NULL);
1333 svcStatus.sb_restarts = 0;
1334 if (setServiceStatus(&svcStatus) != SUCCESS) {
1335 clulog(LOG_ERR, "Failed setting "
1336 "service status for %s\n",
1337 svcName);
1340 clu_svc_unlock(svcID);
1341 } else {
1342 clulog(LOG_WARNING,
1343 "Unable to obtain lock for service %s: %s\n",
1344 svcName,
1345 strerror(errno));
1349 * We stop all services to clean up any state in the case
1350 * that this system came down without gracefully stopping
1351 * services.
1353 if (exec_service_script(SVC_STOP_STR, svcID, 1, NULL) !=
1354 SUCCESS) {
1355 clulog(LOG_ALERT,
1356 "Failed stopping service %s during init\n",
1357 svcName);
1358 continue;
1363 return SUCCESS;
1368 * Send a message to the target node to start the service.
1370 static int
1371 relocate_service(int svcID, int request, int target)
1373 SmMessageSt msg_relo;
1374 int fd_relo, msg_ret;
1376 /* Build the message header */
1377 msg_relo.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
1378 msg_relo.sm_hdr.gh_command = SVC_ACTION_REQUEST;
1379 msg_relo.sm_hdr.gh_length = sizeof (SmMessageSt);
1380 msg_relo.sm_data.d_action = request;
1381 msg_relo.sm_data.d_svcID = svcID;
1382 msg_relo.sm_data.d_ret = 0;
1384 /* Open a connection to the other node */
1386 if ((fd_relo = msg_open(PROCID_CLUSVCMGRD, target)) < 0) {
1387 clulog(LOG_ERR, "Failed opening connection to member #%d\n",
1388 target);
1389 return FAIL;
1392 /* Encode */
1393 swab_SmMessageSt(&msg_relo);
1395 /* Send relocate message to the other node */
1396 if (msg_send(fd_relo, &msg_relo, sizeof (SmMessageSt)) !=
1397 sizeof (SmMessageSt)) {
1398 clulog(LOG_ERR,
1399 "Error sending relocate request to member #%d\n",
1400 target);
1401 msg_close(fd_relo);
1402 return FAIL;
1405 clulog(LOG_DEBUG, "Sent relocate request.\n");
1407 /* Check the response */
1408 msg_ret = msg_receive(fd_relo, &msg_relo, sizeof (SmMessageSt));
1410 if (msg_ret != sizeof (SmMessageSt)) {
1412 * In this case, we don't restart the service, because the
1413 * service state is actually unknown to us at this time.
1415 clulog(LOG_ERR, "Mangled reply from member #%d during service "
1416 "relocate\n", target);
1417 msg_close(fd_relo);
1418 return SUCCESS; /* XXX really UNKNOWN */
1421 /* Got a valid response from other node. */
1422 msg_close(fd_relo);
1424 /* Decode */
1425 swab_SmMessageSt(&msg_relo);
1427 return msg_relo.sm_data.d_ret;
1432 * Advise whether or not we should drop a particular request for a given
1433 * service.
1435 * @param svcID Service ID in question.
1436 * @param req Particular request in question.
1437 * @param svcStatus Current service status block.
1438 * @return 1 for TRUE (drop service request), 0 for FALSE (do not
1439 * drop given request)
1442 svc_advise_drop_request(int svcID, int req, ServiceBlock * svcStatus)
1445 * Drop the request if it's not a DISABLE and not a START_PENDING
1446 * if the service is in the PENDING state (ie, it failed on one node)
1448 if ((svcStatus->sb_state == SVC_PENDING) &&
1449 ((req != SVC_START_PENDING) && (req != SVC_DISABLE))) {
1450 clulog(LOG_DEBUG,
1451 "Dropping op %d for svc%d: Service Pending Start\n",
1452 req, svcID);
1453 return 1;
1457 * Drop the request if it's an SVC_CHECK and the service isn't started.
1459 if ((req == SVC_CHECK) &&
1460 ((svcStatus->sb_state != SVC_STARTED) ||
1461 (svcStatus->sb_owner != myNodeID))) {
1462 clulog(LOG_DEBUG, "Dropping SVC_CHECK for svc%d: Service "
1463 "not running locally\n", svcID);
1464 return 1;
1468 * Drop the request if it's an SVC_CHECK and we're already doing
1469 * something to that service so that other requests may continue.
1471 if ((req == SVC_CHECK) && svc_children[svcID].cs_pid) {
1472 clulog(LOG_DEBUG,
1473 "Dropping SVC_CHECK for svc%d: PID%d has not completed",
1474 svcID, svc_children[svcID].cs_pid);
1475 return 1;
1479 * Drop the request if it's an SVC_START, we are the owner, and
1480 * the service is currently stopping
1482 if ((req == SVC_START) && svc_children[svcID].cs_pid) {
1483 clulog(LOG_DEBUG,
1484 "Dropping SVC_START for svc%d: PID%d has not completed",
1485 svcID, svc_children[svcID].cs_pid);
1486 return 1;
1489 return 0;
1494 * Determine the target node we should relocate the service to if we are
1495 * not given one from cluadmin. This checks the failover domain to see
1496 * the next node online in a given failover group.
1498 * @param rmask The nodes allowed to be checked for when we are
1499 * trying to determine who should start the service.
1500 * @param current_owner The current owner of the service, or the node
1501 * who is requesting the information. This is the
1502 * _last_ member allowed to run the service.
1503 * @param svcID ID of the service in question.
1506 best_target_node(memb_mask_t rmask, int current_owner, int svcID)
1508 int x;
1510 x = current_owner + 1;
1511 if (x >= MAX_NODES)
1512 x = 0;
1514 do {
1515 if (node_should_start(x, rmask, svcID) == FOD_BEST) {
1516 return x;
1519 x++;
1520 if (x >= MAX_NODES)
1521 x = 0;
1522 } while (x != current_owner);
1524 return current_owner;
1528 #if 0
1530 * clear_failure_mask(int svcID)
1532 * @param svcID
1533 * @see mark_self_failed
1536 clear_failure_mask(int svcID)
1538 ServiceBlock svcStatus;
1540 if (clu_svc_lock(svcID) == -1) {
1541 clulog(LOG_ERR, "Couldn't obtain lock for service %d: %s\n",
1542 svcID, strerror(errno));
1543 return FAIL;
1546 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1547 clulog(LOG_ERR, "Couldn't obtain status for service %d\n",
1548 svcID);
1549 clu_svc_unlock(svcID);
1550 return FAIL;
1553 memset(svcStatus.sb_failed_mask, 0, sizeof(svcStatus.sb_failed_mask));
1554 if (setServiceStatus(&svcStatus) != SUCCESS) {
1555 clulog(LOG_ERR, "Couldn't set FAILURE status for service %d\n",
1556 svcID);
1557 return FAIL;
1560 clu_svc_unlock(svcID);
1561 return SUCCESS;
1565 * Marks our bit in the failed_nodes bitmask in the service block on disk.
1566 * This is a signal to other members to _not_ send us the service again.
1567 * This mask is cleared when a service is successfully started.
1569 * @param svcID
1570 * @see clear_failure_mask
1573 mark_self_failed(int svcID)
1575 ServiceBlock svcStatus;
1577 if (clu_svc_lock(svcID) == -1) {
1578 clulog(LOG_ERR, "Couldn't obtain lock for service %d: %s\n",
1579 svcID, strerror(errno));
1580 return FAIL;
1583 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1584 clulog(LOG_ERR, "Couldn't obtain status for service %d\n",
1585 svcID);
1586 clu_svc_unlock(svcID);
1587 return FAIL;
1590 /* Mark ourselves as FAILED for this service */
1591 memb_mark_up(svcStatus.sb_failed_mask, myNodeID);
1593 if (setServiceStatus(&svcStatus) != SUCCESS) {
1594 clulog(LOG_ERR, "Couldn't set FAILURE status for service %d\n",
1595 svcID);
1596 return FAIL;
1599 clu_svc_unlock(svcID);
1601 return SUCCESS;
1603 #endif
1607 svc_report_failure(int svcID)
1609 ServiceBlock svcStatus;
1610 char *svcName;
1611 char *nodeName;
1613 getSvcName(svcID, &svcName);
1615 if (clu_svc_lock(svcID) == -1) {
1616 clulog(LOG_ERR, "Couldn't obtain lock for service %s: %s\n",
1617 svcName, strerror(errno));
1618 return FAIL;
1621 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1622 clulog(LOG_ERR, "Couldn't obtain status for service %s\n",
1623 svcName);
1624 clu_svc_unlock(svcID);
1625 return FAIL;
1627 clu_svc_unlock(svcID);
1629 getNodeName(svcStatus.sb_last_owner, &nodeName);
1631 clulog(LOG_ALERT,
1632 "Service %s returned failure code. Last Owner: %s\n",
1633 svcName, nodeName);
1634 clulog(LOG_ALERT,
1635 "Administrator intervention required.\n",
1636 svcName, nodeName);
1638 return SUCCESS;
1642 * handle_relocate_req - Relocate a service. This seems like a huge
1643 * deal, except it really isn't.
1645 * @param svcID Service ID in question.
1646 * @param flags If (flags & SVCF_PENDING), we were called from
1647 * handle_start_req - and so we should ignore all local
1648 * restarts/stops - since handle_start_req does this
1649 * for us.
1650 * @param preferred_target When sent a relocate message from the
1651 * management software, a destination node
1652 * is sent as well. This causes us to try
1653 * starting the service on that node *first*,
1654 * but does NOT GUARANTEE that the service
1655 * will end up on that node. It will end up
1656 * on whatever node actually successfully
1657 * starts it.
1658 * @param new_owner Member who actually ends up owning the service.
1661 handle_relocate_req(int svcID, int flags, int preferred_target,
1662 uint32_t *new_owner)
1664 memb_mask_t allowed_nodes;
1665 int target = preferred_target;
1666 int request;
1667 char *nodeName=NULL, *svcName=NULL;
1669 getSvcName(svcID, &svcName);
1670 request = (flags & SVCF_PENDING) ? SVC_START_PENDING :
1671 SVC_START_RELOCATE;
1674 * Stop the service - if we haven't already done so.
1676 if (!(flags & SVCF_PENDING)) {
1677 if (svc_stop(svcID, flags) != SUCCESS) {
1678 if (svc_start(svcID, flags) != SUCCESS)
1679 svc_fail(svcID);
1680 return FAIL;
1685 * First, see if it's legal to relocate to the target node. Legal
1686 * means: the node is online and is in the [restricted] failover
1687 * domain of the service, or the service has no failover domain.
1689 if (preferred_target >= 0 && preferred_target <= MAX_NODES) {
1691 memset(allowed_nodes, 0, sizeof(allowed_nodes));
1692 memb_mark_up(allowed_nodes, preferred_target);
1693 target = best_target_node(allowed_nodes, myNodeID, svcID);
1696 * I am the ONLY one capable of running this service,
1697 * PERIOD...
1699 if (target == myNodeID)
1700 goto exhausted;
1702 if (target == preferred_target) {
1704 * It's legal to start the service on the given
1705 * node. Try to do so.
1707 if (relocate_service(svcID, request, target) ==
1708 SUCCESS) {
1709 *new_owner = target;
1711 * Great! We're done...
1713 return SUCCESS;
1719 * Ok, so, we failed to send it to the preferred target node.
1720 * Try to start it on all other nodes.
1722 memcpy(allowed_nodes, membership, sizeof(memb_mask_t));
1723 memb_mark_down(allowed_nodes, myNodeID);
1725 /* Don't try to relocate to the preferred target more than once. */
1726 if (preferred_target >= 0 && preferred_target <= MAX_NODES)
1727 memb_mark_down(allowed_nodes, myNodeID);
1729 while (memb_count(allowed_nodes)) {
1730 target = best_target_node(allowed_nodes, myNodeID, svcID);
1731 if (target == myNodeID)
1732 goto exhausted;
1734 switch (relocate_service(svcID, request, target)) {
1735 case FAIL:
1736 memb_mark_down(allowed_nodes, target);
1737 continue;
1738 case ABORT:
1739 svc_report_failure(svcID);
1740 return FAIL;
1741 case SUCCESS:
1742 *new_owner = target;
1743 getNodeName(target, &nodeName);
1744 clulog(LOG_NOTICE,
1745 "Service %s now running on member %s\n",
1746 svcName, nodeName);
1747 return SUCCESS;
1748 default:
1749 clulog(LOG_ERR, "Invalid reply from member %d during"
1750 " relocate operation!\n", target);
1755 * We got sent here from handle_start_req.
1756 * We're DONE.
1758 if (flags & SVCF_PENDING)
1759 return FAIL;
1762 * All potential places for the service to start have been exhausted.
1763 * We're done.
1765 exhausted:
1766 clulog(LOG_WARNING, "Attempting to restart service %s locally.\n",
1767 svcName);
1768 if (svc_start(svcID, flags) == SUCCESS) {
1769 *new_owner = myNodeID;
1770 return FAIL;
1773 if (svc_stop(svcID, 0) != SUCCESS) {
1774 svc_fail(svcID);
1775 svc_report_failure(svcID);
1778 return FAIL;
1783 * handle_start_req - Handle a generic start request from a user or during
1784 * service manager boot.
1786 * @param svcID Service ID to start.
1787 * @param flags
1788 * @param new_owner Owner which actually started the service.
1789 * @return FAIL - Failure.
1790 * SUCCESS - The service is running.
1793 handle_start_req(int svcID, int flags, uint32_t *new_owner)
1795 int ret, tolerance = FOD_BEST, target = -1;
1798 * When a service request is from a user application (eg, clusvcadm),
1799 * accept FOD_GOOD instead of FOD_BEST
1801 if (flags & SVCF_START_DISABLED)
1802 tolerance = FOD_GOOD;
1804 if (!(flags & SVCF_RESTART) &&
1805 (node_should_start(myNodeID, membership, svcID) < tolerance)) {
1807 /* Try to send to someone else who might care about it */
1808 target = best_target_node(membership, myNodeID, svcID);
1809 ret = handle_relocate_req(svcID, SVCF_PENDING, target,
1810 new_owner);
1812 if (ret == FAIL)
1813 svc_disable(svcID);
1814 return ret;
1818 * Strip out all flags which are invalid.
1820 clulog(LOG_DEBUG, "Starting service %d - flags 0x%08x\n", svcID,
1821 flags);
1823 #if 0
1825 * This is a 'root' start request. We need to clear out our failure
1826 * mask here - so that we can try all nodes if necessary.
1828 flags |= SVCF_CLEAR_FAILURES;
1829 #endif
1830 ret = svc_start(svcID, flags);
1832 #if 0
1833 if (clear_failure_mask(svcID) != SUCCESS) {
1834 clulog(LOG_WARNING, "Could not clear failure bitmask for "
1835 "service #%s!\n", svcName);
1837 #endif
1840 * If we succeeded, then we're done.
1842 if (ret == SUCCESS) {
1843 *new_owner = myNodeID;
1844 return SUCCESS;
1848 * Keep the state open so the other nodes don't try to start
1849 * it. This allows us to be the 'root' of a given service.
1851 clulog(LOG_DEBUG, "Stopping failed service %d\n", svcID);
1852 if (svc_stop(svcID, SVCF_PENDING) != SUCCESS) {
1853 clulog(LOG_CRIT, "Service %d failed to stop cleanly", svcID);
1854 svc_fail(svcID);
1857 * If we failed to stop the service, we're done. At this
1858 * point, we can't determine the service's status - so
1859 * trying to start it on other nodes is right out.
1861 return ABORT;
1865 * OK, it failed to start - but succeeded to stop. Now,
1866 * we should relocate the service.
1868 clulog(LOG_WARNING, "Relocating failed service %d\n", svcID);
1869 ret = handle_relocate_req(svcID, SVCF_PENDING, -1, new_owner);
1871 if (ret == FAIL)
1872 svc_disable(svcID);
1874 return ret;
1879 * handle_start_remote_req - Handle a remote start request.
1881 * @param svcID Service ID to start.
1882 * @param flags Flags to use to determine start behavior.
1883 * @return FAIL - Local failure. ABORT - Unrecoverable error:
1884 * the service didn't start, nor stop cleanly. SUCCESS
1885 * - We started the service.
1888 handle_start_remote_req(int svcID, int flags)
1890 memb_mask_t rmask;
1891 int tolerance = FOD_BEST;
1893 memset(rmask, 0, sizeof(rmask));
1894 memb_mark_up(rmask, myNodeID);
1896 if (flags & SVCF_START_DISABLED)
1897 tolerance = FOD_GOOD;
1900 * See if we agree with our ability to start the given service.
1902 if (node_should_start(myNodeID, rmask, svcID) < tolerance)
1903 return FAIL;
1905 if (svc_start(svcID, flags) == SUCCESS)
1906 return SUCCESS;
1908 #if 0
1909 if (mark_self_failed(svcID) == FAIL) {
1910 svc_fail(svcID);
1911 return ABORT;
1913 #endif
1915 if (svc_stop(svcID, 0) == SUCCESS)
1916 return FAIL;
1918 svc_fail(svcID);
1919 return ABORT;
1924 * Handle a request regarding a service.
1926 * @param svcID ID of service in question.
1927 * @param action Action to be performed on the service.
1928 * @param target In the case of a relocate, target/destination node
1929 * we're relocating to.
1930 * @param fd File descriptor on which we send our response.
1932 void
1933 handle_svc_request(int svcID, int action, int target, msg_handle_t fd)
1935 char *svcName;
1936 SmMessageSt msg_sm;
1937 int ret = FAIL;
1938 ServiceBlock svcStatus;
1939 int flags = 0;
1940 uint32_t new_owner = NODE_ID_NONE;
1941 char child = 0;
1943 getSvcName(svcID, &svcName);
1944 clulog(LOG_DEBUG, "Service %s request %d\n", svcName, action);
1946 if (myNodeState != NODE_UP)
1947 goto out;
1950 * Don't assume the service exists...
1952 if (serviceExists(svcID) != YES) {
1953 goto out;
1956 if (clu_svc_lock(svcID) == -1) {
1957 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1958 strerror(errno));
1959 goto out;
1962 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1963 clulog(LOG_ERR, "Cannot get status for service %d\n", svcID);
1964 clu_svc_unlock(svcID);
1965 goto out;
1968 clu_svc_unlock(svcID);
1971 * Check to see if we should drop the service request. This
1972 * is based on the current servuce status, the action required,
1973 * etc.
1975 if (svc_advise_drop_request(svcID, action, &svcStatus)) {
1976 ret = SUCCESS;
1977 goto out;
1981 * Fork so that we can run service actions in parallel.
1983 while (svc_children[svcID].cs_pid != 0) {
1985 if (svcStatus.sb_state == SVC_PENDING) {
1987 * Shouldn't get here, but if so, avoid deadlock.
1989 clulog(LOG_ERR, "%s failed during "
1990 "relocate-to-preferred member operation",
1991 svcName);
1992 ret = FAIL;
1993 goto out;
1996 clulog(LOG_DEBUG,
1997 "Proc %d already running action on service %s\n",
1998 svc_children[svcID].cs_pid, svcName);
2000 /* See if we missed the SIGCHLD */
2001 if (!cleanup_child(svcID))
2002 sleep(5);
2005 /* Record what the child will be doing */
2006 svc_children[svcID].cs_rq = action;
2008 if ((svc_children[svcID].cs_pid = fork())) {
2009 if (svc_children[svcID].cs_pid < 0) {
2011 * Fork failed.
2013 clulog(LOG_DEBUG,
2014 "Fork failed handling action request.\n");
2015 svc_children[svcID].cs_pid = 0;
2016 svc_children[svcID].cs_rq = 0;
2018 /* Send reply, if applicable */
2019 goto out;
2022 clulog(LOG_DEBUG, "[M] Pid %d -> %s for service %s\n",
2023 svc_children[svcID].cs_pid, serviceActionStrings[action],
2024 svcName);
2026 return;
2029 block_signal(SIGTERM);
2030 block_signal(SIGHUP);
2031 child = 1;
2032 clulog(LOG_DEBUG, "[C] Pid %d handling %s request for service %s\n",
2033 getpid(), serviceActionStrings[action], svcName);
2035 switch (action) {
2036 case SVC_START:
2037 start_top:
2038 flags |= ((fd == -1) ? 0 : SVCF_START_DISABLED);
2040 ret = handle_start_req(svcID, flags, &new_owner);
2041 break;
2043 case SVC_START_PENDING:
2045 * We allow starting of pending requests only if
2046 * explicitly asked for from someone else - never on
2047 * a local node event.
2049 flags = SVCF_PENDING;
2050 case SVC_START_RELOCATE:
2052 * We use fd as an indicator to see whether or not we
2053 * were called on behalf of a node event. Generally,
2054 * fd is set, but we usually don't handle relocation of
2055 * disabled services -- it's kind of an anomaly.
2057 flags |= (fd == -1) ? 0 : SVCF_START_DISABLED;
2058 ret = handle_start_remote_req(svcID, flags);
2059 break;
2061 case SVC_STOP:
2062 case SVC_RESTART:
2063 restart_top:
2064 if ((ret = svc_stop(svcID, 0)) == SUCCESS) {
2066 * Ok, we did the stop - now do the whole start
2067 * process, including relocating in the case of
2068 * failure.
2070 if (action == SVC_RESTART) {
2071 target = myNodeID;
2072 flags = SVCF_RESTART;
2073 goto start_top;
2076 break;
2079 ret = FAIL;
2081 if (svc_start(svcID, 0) == SUCCESS)
2082 break;
2084 svc_fail(svcID);
2085 break;
2087 case SVC_DISABLE:
2089 if ((ret = svc_disable(svcID)) == SUCCESS)
2090 break;
2093 * We don't run svc_fail here because svc_fail could
2094 * put us back where we were. Always allow disable.
2096 ret = FAIL;
2097 break;
2099 case SVC_CHECK:
2101 if ((ret = svc_check(svcID)) == SUCCESS)
2102 break;
2104 if (ret == ABORT) {
2105 /* Try to relocate service at this point */
2106 ret = handle_relocate_req(svcID, 0, -1, &new_owner);
2107 break;
2110 ret = FAIL;
2112 clulog(LOG_WARNING, "Restarting locally failed service %s\n",
2113 svcName);
2115 (void) svc_stop(svcID, 0);
2118 * Try the whole start process, including relocating it in
2119 * the case that it failed to restart locally.
2121 flags = SVCF_RESTART | SVCF_RESTARTFAILED;
2122 goto start_top;
2124 case SVC_RELOCATE:
2126 if (svcStatus.sb_state == SVC_DISABLED) {
2127 clulog(LOG_DEBUG,
2128 "Can not relocate disabled service %s\n",
2129 svcName);
2130 ret = FAIL;
2131 break;
2134 if (target == myNodeID)
2135 goto restart_top;
2137 ret = handle_relocate_req(svcID, 0, target, &new_owner);
2138 break;
2140 default:
2141 clulog(LOG_ERR, "Invalid service request %d\n", action);
2142 ret = FAIL;
2143 break;
2147 * If fd is valid, the request was on behalf of a client who is
2148 * blocking for the status reply.
2150 out:
2151 if (fd != -1) {
2152 msg_sm.sm_data.d_svcOwner = new_owner;
2153 msg_sm.sm_data.d_ret = ret;
2155 /* Encode before responding... */
2156 swab_SmMessageSt(&msg_sm);
2158 if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) !=
2159 sizeof (SmMessageSt)) {
2160 clulog(LOG_ERR, "Error replying to action request.\n");
2164 if (child)
2165 exit(ret); /* child exit */
2170 * Check to see if we need to kill a child process - and do so if necessary.
2171 * We do not need to reset the cs_pid field. This should only be called
2172 * during a remote node-down event to determine if we had a relocate-request
2173 * or other request out to that node. If so, we need to kill the child
2174 * handling that request.
2176 * @param svcID Service ID
2177 * @param svc Service block (status of svcID)
2179 void
2180 consider_reapage(int svcID, ServiceBlock * svc)
2183 * Since PENDING is only a valid state when BOTH nodes are up, and
2184 * given that the remote node just died, mark the service as
2185 * 'stopped' if it was in the 'pending' state. Kill the child
2186 * process if it exists.
2188 if (svc->sb_state == SVC_PENDING) {
2189 if (svc_children[svcID].cs_pid)
2190 kill(svc_children[svcID].cs_pid, SIGKILL);
2192 clulog(LOG_DEBUG, "Marking %d (state %d) as stopped", svcID,
2193 svc->sb_state);
2195 /* Mark state -> stopped */
2196 if (clu_svc_lock(svcID) == -1) {
2197 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2198 strerror(errno));
2199 return;
2202 if (getServiceStatus(svcID, svc) == SUCCESS) {
2203 svc->sb_last_owner = svc->sb_owner;
2204 svc->sb_owner = NODE_ID_NONE;
2205 svc->sb_state = SVC_STOPPED;
2206 if (setServiceStatus(svc) != SUCCESS) {
2207 clulog(LOG_ERR,
2208 "Failed marking service %d as stopped\n",
2209 svcID);
2212 clu_svc_unlock(svcID);
2213 return;
2216 if (!svc_children[svcID].cs_pid)
2217 return;
2220 * The child was SVC_START and the other node is marked as the owner.
2221 * This means we tried to start it locally, failed, and send a
2222 * REMOTE_START to the other node, but the other node died before we
2223 * received a response.
2225 * Simplify: Kill child whenever our partner owns the service.
2227 if (svc->sb_owner != myNodeID) {
2228 clulog(LOG_INFO,
2229 "Killing child PID%d: Remote member went down!",
2230 svc_children[svcID].cs_pid);
2231 kill(svc_children[svcID].cs_pid, SIGKILL);
2232 return;
2236 * Our last case is an explicit relocate (eg, from cluadmin). The other
2237 * node went down, and we received its node-down event. This could have
2238 * been taken care of above, but we still need to catch the cases where
2239 * it hasn't been taken care of yet...
2241 if (svc_children[svcID].cs_rq == SVC_RELOCATE) {
2242 clulog(LOG_INFO,
2243 "Killing child PID%d: Remote member went down!",
2244 svc_children[svcID].cs_pid);
2245 kill(svc_children[svcID].cs_pid, SIGKILL);
2251 * Rewrite a service block as 'stopped' if all members of its
2252 * restricted failover domain went offline.
2254 * @param svcID Service ID to stop.
2255 * @return FAIL, SUCCESS
2257 static int
2258 check_rdomain_crash(int svcID, ServiceBlock *svcStatus)
2260 char *svcName;
2261 memb_mask_t allowed_nodes;
2263 if (memb_online(membership, svcStatus->sb_owner) ||
2264 (svcStatus->sb_state == SVC_STOPPED))
2265 return SUCCESS;
2267 memcpy(allowed_nodes, membership, sizeof(memb_mask_t));
2268 memb_mark_down(allowed_nodes, svcStatus->sb_owner);
2269 if (best_target_node(allowed_nodes, svcStatus->sb_owner, svcID) !=
2270 svcStatus->sb_owner)
2271 return SUCCESS;
2273 if (clu_svc_lock(svcID) == -1) {
2274 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2275 strerror(errno));
2276 return FAIL;
2279 if (getServiceStatus(svcID, svcStatus) != SUCCESS) {
2280 clu_svc_unlock(svcID);
2281 clulog(LOG_ERR, "Failed getting status for service %s\n",
2282 svcName);
2283 return FAIL;
2286 if ((svcStatus->sb_state != SVC_STARTED) ||
2287 (svcStatus->sb_owner == myNodeID) ||
2288 memb_online(membership, svcStatus->sb_owner)) {
2289 clu_svc_unlock(svcID);
2290 return SUCCESS;
2293 svcStatus->sb_last_owner = svcStatus->sb_owner;
2294 svcStatus->sb_owner = NODE_ID_NONE;
2295 svcStatus->sb_state = SVC_STOPPED;
2296 svcStatus->sb_transition = (uint64_t)time(NULL);
2297 if (setServiceStatus(svcStatus) != SUCCESS) {
2298 clu_svc_unlock(svcID);
2299 clulog(LOG_ERR, "Failed changing service status\n");
2300 return FAIL;
2302 clu_svc_unlock(svcID);
2303 return SUCCESS;
2308 * Called to decide what services to start locally during a node_event.
2309 * Originally a part of node_event, it is now its own function to cut down
2310 * on the length of node_event.
2312 * @see node_event
2314 void
2315 eval_services(int local, int nodeStatus)
2317 int svcID;
2318 char *svcName, *nodeName;
2319 ServiceBlock svcStatus;
2321 if (services_locked)
2322 return;
2324 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
2326 if (serviceExists(svcID) != YES)
2327 continue;
2329 getSvcName(svcID, &svcName);
2332 * Lock the service information and get the current service
2333 * status.
2335 if (clu_svc_lock(svcID) == -1) {
2336 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2337 strerror(errno));
2338 return;
2341 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
2342 clulog(LOG_ERR, "Cannot get status for service %s\n",
2343 svcName);
2344 clu_svc_unlock(svcID);
2345 continue;
2347 clu_svc_unlock(svcID);
2349 if (svcStatus.sb_owner == NODE_ID_NONE)
2350 nodeName = "none";
2351 else
2352 getNodeName(svcStatus.sb_owner, &nodeName);
2354 if ((svcStatus.sb_state == SVC_DISABLED) ||
2355 (svcStatus.sb_state == SVC_FAILED))
2356 continue;
2358 clulog(LOG_DEBUG, "Evaluating service %s, state %s, owner "
2359 "%s\n", svcName,
2360 serviceStateStrings[svcStatus.sb_state], nodeName);
2362 if (local && (nodeStatus == NODE_UP)) {
2365 * Start any stopped services, or started services
2366 * that are owned by a down node.
2368 if (node_should_start(myNodeID, membership, svcID) ==
2369 FOD_BEST)
2370 handle_svc_request(svcID, SVC_START, -1, -1);
2372 continue;
2375 if (!local && (nodeStatus == NODE_DOWN)) {
2378 * Start any stopped services, or started services
2379 * that are owned by a down node.
2381 consider_reapage(svcID, &svcStatus);
2382 if (node_should_start(myNodeID, membership, svcID) ==
2383 FOD_BEST)
2384 handle_svc_request(svcID, SVC_START, -1, -1);
2385 else
2386 check_rdomain_crash(svcID, &svcStatus);
2388 * TODO
2389 * Mark a service as 'stopped' if no members in its restricted
2390 * fail-over domain are running.
2398 * Called to handle the transition of a cluster member from up->down or
2399 * down->up. This handles initializing services (in the local node-up case),
2400 * exiting due to loss of quorum (local node-down), and service fail-over
2401 * (remote node down).
2403 * @param nodeID ID of the member which has come up/gone down.
2404 * @param nodeStatus New state of the member in question.
2405 * @see eval_services
2407 void
2408 node_event(int nodeID, int nodeStatus)
2410 int local = 0;
2411 int partner;
2413 local = (nodeID == myNodeID);
2414 if (local) {
2415 if (nodeStatus == NODE_UP) {
2417 if (myNodeState == NODE_UP)
2418 return;
2420 myNodeState = NODE_UP;
2422 #ifdef OLD_CLU_ALIAS
2423 clu_alias(0);
2424 #endif
2426 clulog(LOG_DEBUG,
2427 "local member up, initializing services\n");
2430 * Initialize all services we own. We needed to wait
2431 * for a NODE_UP event as we need the locking
2432 * subsystem for this.
2435 if (init_services() != SUCCESS) {
2436 clulog(LOG_ERR, "Cannot initialize services\n");
2437 svcmgr_exit(1, 0);
2441 if (nodeStatus == NODE_DOWN) {
2442 svcmgr_exit(0, 0);
2443 /* NOT REACHED */
2445 } else {
2448 * Nothing to do for events from other nodes if we are not up.
2451 if (myNodeState != NODE_UP)
2452 return;
2455 #ifdef OLD_CLU_ALIAS
2456 if (myNodeID == memb_high_node(membership)) {
2457 clu_alias(1);
2458 } else {
2459 clu_alias(0);
2461 #endif
2463 eval_services(local, nodeStatus);
2465 /* If we just came up, and our partner is up request a failback */
2466 if (local && (nodeStatus == NODE_UP)) {
2468 for (partner = 0; partner < MAX_NODES; partner++) {
2469 if (partner == myNodeID)
2470 continue;
2472 if (memb_online(membership, partner)) {
2473 if (request_failback(partner) != SUCCESS) {
2474 clulog(LOG_ERR,
2475 "Unable to inform partner "
2476 "to start failback\n");
2485 * Run service status scripts on all services which (a) we are running and
2486 * (b) have check intervals set.
2488 * @param elapsed Number of elapsed seconds since last time
2489 * check_services was run.
2491 void
2492 check_services(int elapsed)
2494 int svcID;
2495 char *svcName;
2496 ServiceBlock svcStatus;
2497 char *intervalStr;
2498 int interval;
2500 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
2502 if (serviceExists(svcID) != YES)
2503 continue;
2505 getSvcName(svcID, &svcName);
2508 * Check service interval first, since it doesn't
2509 * require a lock.
2511 if (getSvcCheckInterval(svcID, &intervalStr) == SUCCESS)
2512 interval = atoi(intervalStr);
2513 else
2514 interval = 0;
2516 if (!interval)
2517 continue;
2520 * Check service status
2522 if (clu_svc_lock(svcID) == -1) {
2523 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2524 strerror(errno));
2525 return;
2528 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
2529 clu_svc_unlock(svcID);
2530 clulog(LOG_ERR,
2531 "Failed getting status for service %s\n",
2532 svcName);
2533 continue;
2535 clu_svc_unlock(svcID);
2537 if ((svcStatus.sb_owner != myNodeID)
2538 || (svcStatus.sb_state != SVC_STARTED))
2539 continue;
2541 ticks[svcID] += elapsed;
2543 clulog(LOG_DEBUG,
2544 "Check interval for service %s is %d, elapsed %d\n",
2545 svcName, interval, ticks[svcID]);
2547 if (ticks[svcID] < interval) {
2548 clulog(LOG_DEBUG, "Too early to check service %s\n",
2549 svcName);
2550 continue;
2553 ticks[svcID] = 0;
2554 handle_svc_request(svcID, SVC_CHECK, -1, -1);
2560 * Handle a QUORUM or QUORUM_GAINED message from the quorum daemon. This
2561 * updates our local membership view and handles whether or not we should
2562 * exit, as well as determines node transitions (thus, calling node_event()).
2564 * @param msg_quorum Cluster event from the quorum daemin.
2565 * @see node_event
2566 * @return 0
2569 handle_quorum_msg(cm_event_t *msg_quorum)
2571 memb_mask_t node_delta, old_membership;
2572 int x;
2573 char *nodeName;
2574 int me = 0;
2576 memcpy(old_membership, membership, sizeof(memb_mask_t));
2577 memcpy(membership, cm_quorum_mask(msg_quorum), sizeof(memb_mask_t));
2579 lock_set_quorum_view(cm_quorum_view(msg_quorum));
2581 clulog(LOG_INFO, "Quorum Event: View #%d %s\n",
2582 (int)cm_quorum_view(msg_quorum),
2583 memb_mask_str(cm_quorum_mask(msg_quorum)));
2586 * Handle nodes lost. Do our local node event first.
2588 memb_mask_lost(node_delta, old_membership, membership);
2590 me = memb_online(node_delta, myNodeID);
2591 if (me) {
2592 /* Should not happen */
2593 clulog(LOG_INFO, "State change: LOCAL OFFLINE\n");
2594 node_event(myNodeID, NODE_DOWN);
2595 /* NOTREACHED */
2598 for (x=0; x<MAX_NODES; x++) {
2599 if (x == myNodeID)
2600 continue;
2602 * If a node loses its panic status and is not online,
2603 * take over services. That is - someone decided *for sure*
2604 * that said member is DOWN - so its state is no longer
2605 * unknown. (ie, disk-tiebreaker lost quorum...)
2607 getNodeName(x, &nodeName);
2609 if (memb_online(mask_panic, x) &&
2610 !memb_online(cm_quorum_mask_panic(msg_quorum),x) &&
2611 !memb_online(cm_quorum_mask(msg_quorum),x)) {
2612 memb_mark_down(mask_panic, x);
2613 node_event(x, NODE_DOWN);
2614 clulog(LOG_INFO, "State change: %s DOWN\n",
2615 nodeName);
2616 continue;
2619 if (!memb_online(node_delta, x))
2620 continue;
2622 if (memb_online(cm_quorum_mask_panic(msg_quorum), x)) {
2623 clulog(LOG_WARNING, "Member %s's state is uncertain: "
2624 "Some services may be unavailable!",
2625 nodeName);
2626 continue;
2629 node_event(x, NODE_DOWN);
2630 clulog(LOG_INFO, "State change: %s DOWN\n",
2631 nodeName);
2635 * Store our panic nodemask.
2637 memcpy(mask_panic, cm_quorum_mask_panic(msg_quorum),
2638 sizeof(memb_mask_t));
2641 * Handle nodes gained. Do our local node event first.
2643 me = memb_mask_gained(node_delta, old_membership, membership);
2644 if (me) {
2645 clulog(LOG_INFO, "State change: Local UP\n");
2646 node_event(myNodeID, NODE_UP);
2649 for (x=0; x<MAX_NODES; x++) {
2650 if (!memb_online(node_delta, x))
2651 continue;
2653 if (x == myNodeID)
2654 continue;
2656 node_event(x, NODE_UP);
2657 getNodeName(x, &nodeName);
2658 clulog(LOG_INFO, "State change: %s UP\n",
2659 nodeName);
2662 return 0;
2667 * Read a message on a file descriptor (the one which is connected to
2668 * the quorumd daemon) and process it accordingly.
2670 * @param fd File descriptor connected to the quorum daemon.
2671 * @return FAIL - no message waiting/empty message,
2672 * SUCCESS - successfully handled message.
2673 * @see dispatch_msg
2676 quorum_msg(msg_handle_t fd)
2678 cm_event_t *msg_quorum;
2680 msg_quorum = cm_ev_read(fd);
2682 if (!msg_quorum)
2683 return FAIL;
2685 switch (cm_ev_event(msg_quorum)) {
2686 case EV_QUORUM_LOST:
2687 clulog(LOG_CRIT,"Halting services due to loss of quorum\n");
2688 svcmgr_exit(1, 0);
2689 /* NOT REACHED */
2690 break;
2692 case EV_QUORUM:
2693 case EV_QUORUM_GAINED:
2694 handle_quorum_msg(msg_quorum);
2695 break;
2697 case EV_NO_QUORUM:
2698 /* idle(); */
2699 break;
2701 default:
2702 clulog(LOG_DEBUG, "unhandled message request %d\n",
2703 cm_ev_event(msg_quorum));
2704 break;
2707 cm_ev_free(msg_quorum);
2708 return SUCCESS;
2713 * Receive and process a message on a file descriptor and decide what to
2714 * do with it. This function doesn't handle messages from the quorum daemon.
2716 * @param fd File descriptor with a waiting message.S
2717 * @return FAIL - failed to receive/handle message, or invalid
2718 * data received. SUCCESS - handled message successfully.
2719 * @see quorum_msg
2722 dispatch_msg(msg_handle_t fd)
2724 int ret;
2725 generic_msg_hdr msg_hdr;
2726 SmMessageSt msg_sm;
2728 /* Peek-a-boo */
2729 ret = msg_peek(fd, &msg_hdr, sizeof(msg_hdr));
2730 if (ret != sizeof (generic_msg_hdr)) {
2731 clulog(LOG_ERR, "error receiving message header\n");
2732 return FAIL;
2735 /* Decode the header */
2736 swab_generic_msg_hdr(&msg_hdr);
2737 if ((msg_hdr.gh_magic != GENERIC_HDR_MAGIC)) {
2738 clulog(LOG_ERR, "Invalid magic: Wanted 0x%08x, got 0x%08x\n",
2739 GENERIC_HDR_MAGIC, msg_hdr.gh_magic);
2740 return FAIL;
2743 clulog(LOG_DEBUG, "received message, fd %d\n", fd);
2745 switch (msg_hdr.gh_command) {
2746 case SVC_CONFIG_UPDATE:
2747 clulog(LOG_INFO, "Rereading configuration...\n");
2749 rebuild_config_lockless();
2751 msg_svc_init(1);
2752 set_loglevel();
2753 notify_everybody();
2754 break;
2756 case SVC_LOCK:
2757 clulog(LOG_NOTICE, "Service states locked\n");
2758 services_locked = 1;
2759 break;
2761 case SVC_UNLOCK:
2762 clulog(LOG_NOTICE, "Service states unlocked\n");
2763 services_locked = 0;
2764 break;
2766 case SVC_QUERY_LOCK:
2767 msg_send_simple(fd, services_locked?SVC_LOCK:SVC_UNLOCK, 0, 0);
2768 break;
2770 case SVC_ACTION_REQUEST:
2772 ret = msg_receive_timeout(fd, &msg_sm, sizeof(msg_sm),
2773 MSG_TIMEOUT);
2774 if (ret != sizeof(msg_sm)) {
2775 clulog(LOG_ERR, "receiving message data from client "
2776 "error: %d\n", ret);
2777 return FAIL;
2780 /* Decode SmMessageSt message */
2781 swab_SmMessageSt(&msg_sm);
2783 if (services_locked) {
2784 msg_sm.sm_data.d_ret = FAIL;
2785 /* Encode before responding... */
2786 swab_SmMessageSt(&msg_sm);
2788 if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) !=
2789 sizeof (SmMessageSt))
2790 clulog(LOG_ERR,
2791 "Error replying to action request.\n");
2793 break;
2796 if (msg_sm.sm_data.d_action == SVC_FAILBACK) {
2797 failback(msg_sm.sm_data.d_svcOwner);
2798 break;
2801 handle_svc_request(msg_sm.sm_data.d_svcID,
2802 msg_sm.sm_data.d_action,
2803 msg_sm.sm_data.d_svcOwner, fd);
2804 break;
2806 default:
2807 clulog(LOG_DEBUG, "unhandled message request %d\n",
2808 msg_hdr.gh_command);
2809 break;
2811 return SUCCESS;
2816 main(int argc, char **argv)
2818 struct timeval timeout, tv1, tv2;
2819 int elapsed_secs;
2820 int check_period = 0;
2821 msg_handle_t fd;
2822 int i;
2823 msg_handle_t listen_fd, quorum_fd;
2824 sigset_t set;
2825 fd_set rfds;
2826 extern char *optarg;
2827 int foreground = 0, debug = 0, opt, retries = 0;
2829 while ((opt = getopt(argc, argv, "fd")) != EOF) {
2830 switch (opt) {
2831 case 'd':
2832 debug = 1;
2833 break;
2834 case 'f':
2835 foreground = 1;
2836 default:
2837 break;
2841 if (!debug)
2842 (void) clu_set_loglevel(LOG_INFO);
2843 else
2844 (void) clu_set_loglevel(LOG_DEBUG);
2846 if (!foreground)
2847 daemon_init(argv[0]);
2848 else
2849 clu_log_console(1);
2852 * Generally, you do this when you know you have quorum.
2853 * However, the service manager simply doesn't get here without
2854 * quorum... (The quorum daemon spawns it when it achieves quorum)
2856 shared_storage_init();
2857 switch(boot_config_init()) {
2858 case -1:
2859 clulog(LOG_CRIT, "Configuration invalid!\n");
2860 return -1;
2861 case 1:
2862 notify_everybody();
2863 break;
2864 case 0:
2865 default:
2866 break;
2869 memset(membership,0,sizeof(memb_mask_t));
2871 set_facility();
2872 if (!debug)
2873 set_loglevel();
2875 clulog(LOG_DEBUG, "Service Manager starting\n");
2878 * daemon_init() blocks most signals, so we need to add the
2879 * ones the Service Manager is interested in.
2881 sigemptyset(&set);
2882 sigaddset(&set, SIGINT);
2883 sigaddset(&set, SIGTERM);
2884 sigaddset(&set, SIGHUP);
2885 sigaddset(&set, SIGCHLD);
2886 sigprocmask(SIG_UNBLOCK, &set, NULL);
2887 (void) signal(SIGINT, (void (*)(int)) sigterm_handler);
2888 (void) signal(SIGTERM, (void (*)(int)) sigterm_handler);
2889 (void) signal(SIGHUP, (void (*)(int)) sighup_handler);
2890 (void) signal(SIGCHLD, (void (*)(int)) reap_zombies);
2893 * Retrieve our node id
2895 myNodeID = memb_local_id();
2897 getNodeName(myNodeID, &myNodeName);
2898 myNodeName = strdup(myNodeName);
2899 myNodeState = NODE_DOWN;
2901 for (i = 0; i < MAX_SERVICES; i++) {
2902 ticks[i] = 0;
2903 svc_children[i].cs_pid = 0;
2904 svc_children[i].cs_rq = 0;
2908 * Set up the message service
2910 do {
2911 listen_fd = msg_listen(PROCID_CLUSVCMGRD);
2912 if (listen_fd >= 0)
2913 break;
2915 if (++retries < 30) {
2916 sleep(1); /* Arbitrary... */
2917 continue;
2920 /* Could be that we lost and regained quorum really quickly */
2921 clulog(LOG_ERR, "Error setting up message listener: %s\n",
2922 strerror(errno));
2923 clulog(LOG_ERR, "%s process may already be running.\n",
2924 argv[0]);
2925 exit(1);
2926 } while (1);
2929 * Register for quorum events
2931 do {
2932 quorum_fd = cm_ev_register(EC_QUORUM);
2933 if (quorum_fd >= 0)
2934 break;
2936 if (++retries < 10) {
2937 sleep(1);
2938 continue;
2941 clulog(LOG_CRIT, "Couldn't register with the quorum daemon!");
2942 exit(1);
2943 } while(1);
2945 while (1) {
2947 gettimeofday(&tv1, NULL);
2948 #if 0
2950 * Reap any zombied service scripts, as we do not synchronously
2951 * wait on any of the service scripts. If the process was
2952 * handling a service action, clear out the indication that it
2953 * was running.
2956 reap_zombies();
2957 #endif
2958 if (sighup_received) {
2959 sighup_received = 0;
2960 update_config();
2963 if (sigterm_received)
2964 svcmgr_exit(0, 1);
2966 FD_ZERO(&rfds);
2967 FD_SET(listen_fd, &rfds);
2968 FD_SET(quorum_fd, &rfds);
2969 timeout.tv_sec = 2;
2970 timeout.tv_usec = 0;
2972 i = select(MAX(listen_fd,quorum_fd) + 1, &rfds, NULL, NULL,
2973 &timeout);
2976 * We used to not check the return from the select call.
2977 * However, this is necessary now because clusvcmgrd needs
2978 * to properly handle SIGHUP
2980 if (i <= 0) {
2981 FD_ZERO(&rfds);
2982 if ((i == -1) && (errno != EINTR))
2983 clulog(LOG_WARNING, "select: %s\n",
2984 strerror(errno));
2987 if (FD_ISSET(listen_fd, &rfds)) {
2988 fd = msg_accept_timeout(listen_fd, 1);
2990 * Process any waiting messages.
2992 if (fd != -1) {
2993 dispatch_msg(fd);
2994 msg_close(fd);
2998 if (FD_ISSET(quorum_fd, &rfds)) {
2999 clulog(LOG_DEBUG, "Processing quorum event\n");
3000 if (quorum_msg(quorum_fd) == -1) {
3001 clulog(LOG_WARNING, "Invalid message from "
3002 "Quorum Daemon. Reconnecting\n");
3003 /* Failed to process it? Try reconnecting */
3004 cm_ev_unregister(quorum_fd);
3005 sleep(2);
3006 if (((quorum_fd =
3007 cm_ev_register(EC_QUORUM)) == -1) &&
3008 !sigterm_received) {
3010 clulog(LOG_EMERG, "Couldn't reconnect "
3011 "to the quorum daemon! "
3012 "REBOOTING");
3013 REBOOT(RB_AUTOBOOT);
3018 gettimeofday(&tv2, NULL);
3019 elapsed_secs = tv2.tv_sec - tv1.tv_sec;
3022 * Check the status of running services and the cluster
3023 * configuration file (/etc/cluster.xml).
3025 if ((check_period += elapsed_secs) >= CHECK_INTERVAL) {
3026 check_config_file();
3027 if (check_config_data() == 1) {
3028 rebuild_config_lockless();
3029 msg_svc_init(1);
3030 set_loglevel();
3031 notify_everybody();
3033 check_services(check_period);
3034 check_period = 0;