src/daemons/clusvcmgrd.c

   1 /*
   2   Copyright Red Hat, Inc. 2002-2003
   3
   4   This program is free software; you can redistribute it and/or modify it
   5   under the terms of the GNU General Public License as published by the
   6   Free Software Foundation; either version 2, or (at your option) any
   7   later version.
   8
   9   This program is distributed in the hope that it will be useful, but
  10   WITHOUT ANY WARRANTY; without even the implied warranty of
  11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12   General Public License for more details.
  13
  14   You should have received a copy of the GNU General Public License
  15   along with this program; see the file COPYING.  If not, write to the
  16   Free Software Foundation, Inc.,  675 Mass Ave, Cambridge,
  17   MA 02139, USA.
  18 */
  19 /** @file
  20  * Service Manager for RHCM.  This is the 1.0.x service manager with
  21  * extras to make it multi-node capable.
  22  *
  23  * Author: Brian Stevens (bstevens at redhat.com)
  24  *         Lon Hohberger (lhh at redhat.com)
  25  *
  26  */
  27
  28 /*static const char *version __attribute__ ((unused)) = "$Revision: 1.71 $";*/
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <unistd.h>
  33 #include <getopt.h>
  34 #include <signal.h>
  35 #include <string.h>
  36 #include <errno.h>
  37 #include <sys/types.h>
  38 #include <sys/param.h>
  39 #include <sys/wait.h>
  40 #include <sys/syslog.h>
  41 #include <sys/time.h>
  42 #include <linux/reboot.h>
  43 #include <sys/reboot.h>
  44 #include <sched.h>
  45 #include <clusterdefs.h>
  46 #include <xmlwrap.h>
  47 #include <clu_lock.h>
  48 #include <msgsvc.h>
  49 #include <svcmgr.h>
  50 #include <clulog.h>
  51 #include <quorum.h>
  52 #include <clushared.h>
  53 #include <sharedstate.h>
  54 #include <namespace.h>
  55 #include <findproc.h>
  56
  57 #ifdef TESTING
  58 #ifdef reboot
  59 #undef reboot
  60 #endif
  61 #define reboot(arg) {\
  62         clulog(LOG_EMERG, "reboot(%s) @ %s:%d\n", #arg, __FILE__, __LINE__); \
  63         raise(SIGSTOP); \
  64 }
  65 #endif
  66
  67 #define svc_fail(x) \
  68 do { \
  69         clulog(LOG_DEBUG, "Service %d failed @ %s:%d\n",\
  70                x, __FILE__, __LINE__); \
  71         _svc_fail(x); \
  72 } while(0)
  73
  74 #define HEARTBEAT_INTERVAL      60
  75 #define CHECK_INTERVAL          5
  76 #define MSG_TIMEOUT             10
  77
  78 #define SVCF_START_DISABLED             1
  79 #define SVCF_PENDING                    2
  80 #define SVCF_RESTART                    4
  81 #define SVCF_CLEAR_FAILURES             8
  82 #define SVCF_RESTARTFAILED              16
  83
  84 static int myNodeID;
  85 static int myNodeState;
  86 static int services_locked = 0;
  87 #ifdef OLD_CLU_ALIAS
  88 static int alias_owner = -1;
  89 #endif
  90 static char *myNodeName = NULL;
  91 static int ticks[MAX_SERVICES];
  92 static memb_mask_t membership, mask_panic;
  93 static int sighup_received = 0, sigterm_received = 0;
  94
  95 struct child_sm {
  96         pid_t cs_pid;
  97         int cs_rq;
  98 };
  99
 100 static struct child_sm svc_children[MAX_SERVICES];
 101
 102 /*
 103  * from clusvcmgrd_cfg.c:
 104  */
 105 int check_config_file(void);
 106 int check_config_data(void);
 107 int rebuild_config_lockless(void);
 108 int rebuild_config(void);
 109 int handle_config_update(memb_mask_t mask, int my_node_id);
 110 int boot_config_init(void);
 111
 112
 113 /*
 114  * Service action string table
 115  */
 116 static char *serviceActionStrings[] = {
 117         SVC_NONE_STR,
 118         SVC_ADD_STR,
 119         SVC_REMOVE_STR,
 120         SVC_START_STR,
 121         SVC_STOP_STR,
 122         SVC_CHECK_STR,
 123         SVC_DISABLE_STR,
 124         SVC_RELOCATE_STR,
 125         SVC_STATUS_INQUIRY_STR,
 126         SVC_FAILBACK_STR,
 127         SVC_START_PENDING_STR,
 128         SVC_START_RELOCATE_STR,
 129         "reconfigure", /* XXX */
 130         SVC_RESTART_STR
 131 };
 132
 133 extern void daemon_init(char *);
 134
 135 static int svc_stop(int, int);
 136 static int svc_stop_unclean(int);
 137 static int _svc_fail(int svcID);
 138
 139 #ifdef OLD_CLU_ALIAS
 140 static int clu_alias(int);
 141 #endif
 142
 143 static int request_failback(int);
 144 static int failback(int);
 145
 146 static int relocate_service(int svcID, int request, int target);
 147 static void handle_svc_request(int, int, int, msg_handle_t);
 148
 149 int svc_report_failure(int svcID);
 150 int setServiceStatus(ServiceBlock *svcblk);
 151 int getServiceStatus(int svcNum, ServiceBlock *svcblk);
 152 int removeService(int svcNum);
 153
 154
 155 /**
 156  * Block the given signal.
 157  *
 158  * @param sig           Signal to block.
 159  * @return              See man sigprocmask.
 160  */
 161 static int
 162 block_signal(int sig)
 163 {
 164         sigset_t set;
 165
 166         sigemptyset(&set);
 167         sigaddset(&set, sig);
 168
 169         return(sigprocmask(SIG_BLOCK, &set, NULL));
 170 }
 171
 172
 173 /**
 174  * unblock the given signal.
 175  *
 176  * @param sig           Signal to unblock.
 177  * @return              See man sigprocmask.
 178  */
 179 static int
 180 unblock_signal(int sig)
 181 {
 182         sigset_t set;
 183
 184         sigemptyset(&set);
 185         sigaddset(&set, sig);
 186
 187         return(sigprocmask(SIG_UNBLOCK, &set, NULL));
 188 }
 189
 190
 191 static void
 192 set_facility(void)
 193 {
 194         char *p;
 195         if (CFG_Get((char *) "cluster%logfacility", NULL, &p) == CFG_OK) {
 196                 if (p)
 197                         clu_set_facility(p);
 198         }
 199 }
 200
 201
 202 /**
 203  * Send a SVC_FAILBACK request to the given partner member.
 204  *
 205  * @param partner       Partner we are sending request to.
 206  * @return              FAIL or SUCCESS
 207  * @see failback
 208  */
 209 int
 210 request_failback(int partner)
 211 {
 212         msg_handle_t fd_failback;
 213         SmMessageSt msg_failback;
 214
 215         if (partner == myNodeID)
 216                 return SUCCESS;
 217
 218         /*
 219          * Fork here to avoid deadlock.
 220          */
 221         switch(fork()) {
 222         case 0:
 223                 break;
 224         case -1:
 225                 return FAIL;
 226         default:
 227                 return SUCCESS;
 228         }
 229
 230         msg_failback.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
 231         msg_failback.sm_hdr.gh_command = SVC_ACTION_REQUEST;
 232         msg_failback.sm_hdr.gh_length = sizeof (SmMessageSt);
 233         msg_failback.sm_data.d_action = SVC_FAILBACK;
 234         msg_failback.sm_data.d_svcOwner = myNodeID;
 235         msg_failback.sm_data.d_ret = 0;
 236
 237         if ((fd_failback = msg_open(PROCID_CLUSVCMGRD, partner)) < 0) {
 238                 clulog(LOG_DEBUG, "Failed opening connection to svcmgrd\n");
 239                 exit(1);
 240         }
 241
 242         /* Encode */
 243         swab_SmMessageSt(&msg_failback);
 244
 245         if (msg_send(fd_failback, &msg_failback, sizeof (SmMessageSt)) !=
 246             sizeof (SmMessageSt)) {
 247                 msg_close(fd_failback);
 248                 clulog(LOG_ERR, "Error sending failback request.\n");
 249                 exit(1);
 250         }
 251         msg_close(fd_failback);
 252         exit(0);
 253 }
 254
 255
 256 /**
 257  * Handle SVC_FAILBACK from a given node.  This shuts down services which
 258  * should be running on 'target' instead of 'myNodeID'.  Takes into account
 259  * service failover domain and preferred node ordering.  Services without
 260  * a failover domain will never be sent to the requesting node.
 261  *
 262  * @param target        Requestor which sent us SVC_FAILBACK
 263  * @return              SUCCESS
 264  * @see request_failback
 265  */
 266 int
 267 failback(int target)
 268 {
 269         int svcID;
 270
 271         for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
 272                 if (serviceExists(svcID) != YES)
 273                         continue;
 274
 275                 if (!svc_has_domain(svcID))
 276                         continue;
 277
 278                 /*
 279                  * If the service has a failover domain, and the requestor
 280                  * should run it and I shouldn't, then I will give the
 281                  * service back.
 282                  *
 283                  * This relies on handle_svc_request to determine the
 284                  * state of the service.
 285                  */
 286                 if (node_should_start(myNodeID, membership, svcID) <
 287                     node_should_start(target, membership, svcID))
 288                         handle_svc_request(svcID, SVC_RELOCATE, target, -1);
 289         }
 290
 291         return SUCCESS;
 292 }
 293
 294
 295 /**
 296  * See if a child process operating on a specified service has exited.
 297  *
 298  * @param svcID         Service ID's child we're checking out.
 299  * @return              0 indicates that no child has exited.  1 indicates
 300  *                      that the child for the service has, indeed, been
 301  *                      cleaned up.
 302  */
 303 int
 304 cleanup_child(int svcID)
 305 {
 306         /* Obvious check: is there even a child for this service? */
 307         if (!svc_children[svcID].cs_pid)
 308                 return 1;
 309         if (waitpid(svc_children[svcID].cs_pid, NULL, WNOHANG) != -1)
 310                 return 0;
 311         if (errno != ECHILD)
 312                 return 0;
 313
 314         svc_children[svcID].cs_pid = 0;
 315         return 1;
 316 }
 317
 318
 319 /**
 320  * Clean up children.  This is our SIGCHLD handler.
 321  */
 322 void
 323 reap_zombies(int __attribute__ ((unused)) sig)
 324 {
 325         int svcID;
 326         int pid;
 327         //int nchildren = 0;
 328         int status;
 329
 330         while ((pid = waitpid(-1, &status, WNOHANG)) != 0) {
 331                 if (pid < 0) {
 332                         if (errno == EINTR)
 333                                 continue;
 334                         break;          /* No children */
 335                 }
 336
 337                 /*clulog(LOG_DEBUG, "waitpid reaped %d\n", pid);*/
 338                 for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
 339                         if (pid == svc_children[svcID].cs_pid) {
 340                                 svc_children[svcID].cs_pid = 0;
 341                                 //nchildren++;
 342                         }
 343                 }
 344         }
 345         //return (nchildren);
 346 }
 347
 348
 349 /**
 350  * Clean up services and exit.
 351  *
 352  * @param status        Return value passed up to parent process.
 353  * @param clean         This is set to '1' when we're cleanly shutting down
 354  *                      and we have quorum.  Without quorum or during an
 355  *                      unclean shutdown, this is 0.
 356  * @return              If it returns, that's BAD
 357  */
 358 static void
 359 svcmgr_exit(int status, int clean)
 360 {
 361         int svcID, fd;
 362
 363 #ifdef OLD_CLU_ALIAS
 364         clu_alias(0);
 365 #endif
 366
 367         for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
 368
 369                 if (serviceExists(svcID) != YES)
 370                         continue;
 371
 372                 /* Wait for child process acting on this service to exit */
 373                 while (!cleanup_child(svcID))
 374                         sleep(5);
 375
 376                 if (clean) {
 377                         switch(svc_stop(svcID, 0)) {
 378                         case FAIL:
 379                                 svc_fail(svcID);
 380                                 svc_report_failure(svcID);
 381                                 break;
 382                         case ABORT:
 383                                 /* Lock failure during shutdown == switch to
 384                                    unclean mode */
 385                                 clulog(LOG_ERR, "Failed to acquire cluster lock "
 386                                        "during shutdown\n");
 387
 388                                 clean = 0;
 389                                 break;
 390                         case SUCCESS:
 391                         default:
 392                                 continue;
 393                         }
 394                 }
 395
 396                 /* Succeed || Die */
 397                 svc_stop_unclean(svcID);
 398         }
 399
 400         /*
 401          * Tell the quorum daemon that we are leaving
 402          */
 403         clulog(LOG_DEBUG, "Sending message to quorumd that we are exiting\n");
 404
 405         if ((fd = msg_open(PROCID_CLUQUORUMD, myNodeID)) < 0) {
 406                 clulog(LOG_ERR, "msg_open failed to quorum daemon\n");
 407                 exit(status);
 408         }
 409
 410         if (msg_send_simple(fd, QUORUM_EXIT, status, 0) == -1) {
 411                 clulog(LOG_ERR, "Failed sending exit message to cluquorumd\n");
 412                 msg_close(fd);
 413                 exit(status);
 414         }
 415
 416         msg_close(fd);
 417         clulog(LOG_INFO, "Exiting\n");
 418         exit(status);
 419 }
 420
 421
 422 /**
 423  * NOTE: If someone kills the service manager during start, it's possible to
 424  * have a service script still running the stop phase.  This is OKAY!
 425  * This is our SIGTERM handler.
 426  *
 427  * @see svcmgr_exit
 428  */
 429 static void
 430 sigterm_handler(void)
 431 {
 432         block_signal(SIGHUP);
 433         block_signal(SIGTERM);
 434         sigterm_received = 1;
 435 }
 436
 437
 438 /**
 439  * Retrieve our log level from the cluster database and set it accordingly.
 440  */
 441 static void
 442 set_loglevel(void)
 443 {
 444         int level;
 445
 446         if (getSvcMgrLogLevel(&level) == FAIL) {
 447                 clulog(LOG_ERR,
 448                        "Failed getting log level for from config database\n");
 449                 return;
 450         }
 451
 452         if (clu_set_loglevel(level) == -1) {
 453                 clulog(LOG_ERR, "Failed setting log level\n");
 454         }
 455 }
 456
 457
 458 /**
 459  * Noitify the local daemons that the on-disk configuration has changed, and
 460  * so needs to be reread.
 461  */
 462 static void
 463 notify_everybody(void)
 464 {
 465         /*
 466          * Notify local daemons of the cofiguration update...
 467          */
 468         killall("clumembd", SIGHUP);
 469         killall("cluquorumd", SIGHUP);
 470         killall("clulockd", SIGHUP);
 471         killall("clurmtabd", SIGHUP);
 472 }
 473
 474
 475 /**
 476  * Handle an updated configuration.  This is called after we receive a SIGHUP.
 477  *
 478  * @see sighup_handler
 479  */
 480 void
 481 update_config(void)
 482 {
 483         int really_updated = 1;
 484
 485         block_signal(SIGHUP);
 486         /* XXX check for return code?? */
 487         /* We reload the msg service stuff inside handle_config_update */
 488         really_updated = handle_config_update(membership, myNodeID);
 489
 490         if (really_updated == 0) {
 491                 set_loglevel();
 492                 notify_everybody();
 493         }
 494
 495         /*
 496          *  If we fail to update, the other service managers will reread
 497          * the shared config in a few seconds anyway.
 498          */
 499         unblock_signal(SIGHUP);
 500 }
 501
 502
 503 /**
 504  * When we receive SIGHUP, we set the global flag.  We soon after call
 505  * update_config.
 506  *
 507  * @see update_config
 508  */
 509 void
 510 sighup_handler(int __attribute__ ((unused)) sig)
 511 {
 512         sighup_received = 1;
 513 }
 514
 515
 516 /**
 517  * Run the service script for a given service.  The service scripts handle
 518  * the real meat of starting/stopping services.
 519  *
 520  * @param action        The action to take (ie, start/stop/status)
 521  * @param svcID         The service ID we intend to take 'action' on.
 522  * @param block         Set to 0 if the service script should run in the
 523  *                      background, 1 if we should wait for it to complete
 524  *                      before continuing.
 525  * @param ret           The return code of the service script.
 526  * @return              SUCCESS or FAIL.
 527  */
 528 static int
 529 exec_service_script(char *action, int svcID, int block, int *ret)
 530 {
 531         int pid;
 532         char svcIDstr[8];
 533         char *svcName;
 534         int local_ret = 0;
 535         sigset_t set;
 536         struct sched_param param;
 537
 538         getSvcName(svcID, &svcName);
 539
 540         clulog(LOG_DEBUG, "Exec of script %s, action %s, service %s\n",
 541                SVC_ACTION_SCRIPT, action, svcName);
 542
 543         pid = fork();
 544         if (pid < 0) {
 545                 clulog(LOG_ERR, "fork failed: %s", strerror(errno));
 546                 return FAIL;
 547         }
 548         if (pid) {
 549
 550                 if (ret)
 551                         *ret = 0;
 552
 553                 if (block) {
 554                         do {
 555                                 pid = waitpid(pid, &local_ret, 0);
 556                                 if (pid < 0) {
 557                                         if (errno == EINTR)
 558                                                 continue;
 559                                         clulog(LOG_DEBUG,
 560                                                "waitpid: %s",
 561                                                strerror(errno));
 562                                         /* Fake it. */
 563                                         local_ret = 0;
 564                                 }
 565                         } while (0);
 566
 567                         clulog(LOG_DEBUG,
 568                                "Exec of script for service %s returned %d\n",
 569                                svcName, local_ret);
 570                         if (ret)
 571                                 *ret = local_ret;
 572                 }
 573                 return SUCCESS;
 574         }
 575
 576         /**
 577          * we need to set the sched_priority back to normal in case clusvcmgrd
 578          * is running in a different prio b/c cluquorumd%rtp is set
 579          */
 580         if (sched_getscheduler(0) != SCHED_OTHER) {
 581                 memset(&param,0,sizeof(param));
 582                 param.sched_priority = 0;
 583                 if (sched_setscheduler(0, SCHED_OTHER, (void *)&param) != 0)
 584                         clulog(LOG_WARNING, "Setting child to normal priority "
 585                                "failed: %s\n", strerror(errno));
 586                 else
 587                         clulog(LOG_DEBUG, "Using normal priority\n");
 588         }
 589
 590         /* lhh - Unblock signals so the user script doesn't break */
 591         sigfillset(&set);
 592         if (sigprocmask(SIG_UNBLOCK, &set, NULL) != 0) {
 593                 clulog(LOG_WARNING, "Failed to unblock signals: %s\n",
 594                        strerror(errno));
 595         }
 596
 597         snprintf(svcIDstr, sizeof (svcIDstr), "%d", svcID);
 598         local_ret =
 599             execl(SVC_ACTION_SCRIPT, SVC_ACTION_SCRIPT, action, svcIDstr, NULL);
 600
 601         clulog(LOG_DEBUG, "Exec failed of %s, action %s, service %s, err %s\n",
 602                SVC_ACTION_SCRIPT, action, svcName, strerror(errno));
 603
 604         exit(local_ret);
 605 }
 606
 607
 608 #ifdef OLD_CLU_ALIAS
 609 static int
 610 clu_alias(int req)
 611 {
 612         int pid;
 613         int local_ret;
 614
 615         if (req) {
 616                 if (alias_owner == myNodeID)
 617                         return SUCCESS;
 618                 alias_owner = myNodeID;
 619                 clulog(LOG_DEBUG, "Start cluster alias request\n");
 620         } else {
 621                 if (alias_owner != myNodeID)
 622                         return SUCCESS;
 623                 alias_owner = -1;
 624                 clulog(LOG_DEBUG, "Stop cluster alias request\n");
 625         }
 626
 627         pid = fork();
 628         if (pid < 0) {
 629                 clulog(LOG_ERR, "fork failed: %s", strerror(errno));
 630                 return FAIL;
 631         }
 632         if (pid) {
 633                 do {
 634                         pid = waitpid(pid, &local_ret, 0);
 635                         if ((pid < 0) && (errno == EINTR))
 636                                 continue;
 637                 } while (0);
 638
 639                 clulog(LOG_DEBUG, "Exec of alias script returned %d\n",
 640                        local_ret);
 641                 return local_ret;
 642         }
 643
 644         block_signal(SIGTERM);
 645         block_signal(SIGHUP);
 646
 647         if (req)
 648                 local_ret =
 649                     execl(CLU_ALIAS_SCRIPT, CLU_ALIAS_SCRIPT, "start", NULL);
 650         else
 651                 local_ret =
 652                     execl(CLU_ALIAS_SCRIPT, CLU_ALIAS_SCRIPT, "stop", NULL);
 653
 654         clulog(LOG_DEBUG, "Exec failed of %s, err %s\n", CLU_ALIAS_SCRIPT,
 655                strerror(errno));
 656
 657         exit(local_ret);
 658 }
 659 #endif
 660
 661
 662 /**
 663  * Initialize an on-disk service block.
 664  *
 665  * @param svcID         Service ID whose block we need to update.
 666  * @return              FAIL or SUCCESS.
 667  */
 668 int
 669 svc_add(int svcID)
 670 {
 671         ServiceBlock svcStatus;
 672
 673         clulog(LOG_DEBUG, "Initializing service #%d\n", svcID);
 674
 675         /*
 676          * Make sure the service does not exist
 677          */
 678
 679         if (clu_svc_lock(svcID) == -1) {
 680                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
 681                        strerror(errno));
 682                 return FAIL;
 683         }
 684
 685         if (getServiceStatus(svcID, &svcStatus) == SUCCESS) {
 686                 clulog(LOG_ERR,
 687                        "Service #%d already exists!\n",
 688                        svcID);
 689                 clu_svc_unlock(svcID);
 690                 return FAIL;
 691         }
 692
 693         svcStatus.sb_id = svcID;
 694         svcStatus.sb_owner = NODE_ID_NONE;
 695         svcStatus.sb_last_owner = NODE_ID_NONE;
 696         svcStatus.sb_state = SVC_DISABLED;
 697         svcStatus.sb_transition = (uint64_t)time(NULL);
 698         svcStatus.sb_restarts = 0;
 699
 700         if (setServiceStatus(&svcStatus) != SUCCESS) {
 701                 (void) removeService(svcID);
 702                 clu_svc_unlock(svcID);
 703                 return FAIL;
 704         }
 705
 706         clu_svc_unlock(svcID);
 707         return SUCCESS;
 708 }
 709
 710
 711 /**
 712  * Set an on-disk service block's state to UNINITIALIZED.
 713  *
 714  * @param svcID         Service ID whose block we need to update.
 715  * @return              FAIL or SUCCESS.
 716  */
 717 int
 718 svc_remove(int svcID)
 719 {
 720         clulog(LOG_DEBUG, "Removing service #%d from database\n", svcID);
 721
 722         if (clu_svc_lock(svcID) == -1) {
 723                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
 724                        strerror(errno));
 725                 return FAIL;
 726         }
 727
 728         if (removeService(svcID) != 0) {
 729                 clulog(LOG_ERR, "Failed removing service %d from database\n",
 730                        svcID);
 731                 clu_svc_unlock(svcID);
 732                 return FAIL;
 733         }
 734
 735         clu_svc_unlock(svcID);
 736         return SUCCESS;
 737 }
 738
 739
 740 /**
 741  * Advise service manager as to whether or not to start a service, given
 742  * that we already know it's legal to run the service.
 743  *
 744  * @param svcStatus     Current service status.
 745  * @param svcName       Service name
 746  * @param flags         Specify whether or not it's legal to start a
 747  *                      disabled service, etc.
 748  * @return              0 = DO NOT start service, return FAIL
 749  *                      1 = START service - return whatever it returns.
 750  *                      2 = DO NOT start service, return SUCCESS
 751  */
 752 int
 753 svc_advise_start(ServiceBlock *svcStatus, char *svcName, int flags)
 754 {
 755         char *nodeName;
 756
 757         switch(svcStatus->sb_state) {
 758         case SVC_FAILED:
 759                 clulog(LOG_ERR, "Service %s has failed on all applicable "
 760                        "members; can not start.\n", svcName);
 761                 return 0;
 762
 763         case SVC_STARTED:
 764         case SVC_STOPPING:
 765                 getNodeName(svcStatus->sb_owner, &nodeName);
 766                 if ((svcStatus->sb_owner == myNodeID) ||
 767                     (memb_online(membership, svcStatus->sb_owner)==1) ||
 768                     (memb_online(mask_panic, svcStatus->sb_owner)==1)) {
 769                         /*
 770                          * Service is running and the owner is online!
 771                          */
 772                         clulog(LOG_DEBUG,
 773                                "Service is running on member %s.\n",
 774                                nodeName);
 775                         return 2;
 776                 }
 777
 778                 /*
 779                  * Service is running but owner is down -> FAILOVER
 780                  */
 781                 clulog(LOG_NOTICE,
 782                        "Taking over service %s from down member %s\n",
 783                        svcName, nodeName);
 784                 return 1;
 785
 786         case SVC_PENDING:
 787                 /*
 788                  * Starting failed service...
 789                  */
 790                 if (flags & SVCF_PENDING) {
 791                         clulog(LOG_NOTICE, "Starting failed service %s\n",
 792                                svcName);
 793                         svcStatus->sb_state = SVC_STOPPED;
 794                         /* Start! */
 795                         return 1;
 796                 }
 797
 798                 /* Don't start, but return success. */
 799                 clulog(LOG_DEBUG,
 800                        "Not starting %s: pending/transitional state\n",
 801                        svcName);
 802                 return 2;
 803
 804         case SVC_STOPPED:
 805                 clulog(LOG_NOTICE, "Starting stopped service %s\n", svcName);
 806                 return 1;
 807
 808         case SVC_DISABLED:
 809         case SVC_UNINITIALIZED:
 810                 if (flags & SVCF_START_DISABLED) {
 811                         clulog(LOG_NOTICE, "Starting disabled service %s\n",
 812                                svcName);
 813                         return 1;
 814                 }
 815
 816                 clulog(LOG_DEBUG, "Not starting disabled service %s\n",
 817                        svcName);
 818                 return 0;
 819
 820         default:
 821                 clulog(LOG_ERR,
 822                        "Cannot start service %s: Invalid State %d\n",
 823                        svcName, svcStatus->sb_state);
 824                 return 0;
 825         }
 826 }
 827
 828
 829 /**
 830  * Start a cluster service.
 831  *
 832  * @param svcID         Service ID to start.
 833  * @param flags         Service-operation specific flags to take into account.
 834  * @see svc_advise_start
 835  * @return              FAIL, SUCCESS
 836  */
 837 static int
 838 svc_start(int svcID, int flags)
 839 {
 840         char *svcName;
 841         ServiceBlock svcStatus;
 842         int ret;
 843
 844         getSvcName(svcID, &svcName);
 845         clulog(LOG_DEBUG, "Handling start request for service %s\n", svcName);
 846
 847         if (clu_svc_lock(svcID) == -1) {
 848                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
 849                        strerror(errno));
 850                 return FAIL;
 851         }
 852
 853         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
 854                 clu_svc_unlock(svcID);
 855                 clulog(LOG_ERR, "Failed getting status for service %s\n",
 856                        svcName);
 857                 return FAIL;
 858         }
 859
 860         /* LOCK HELD */
 861         switch (svc_advise_start(&svcStatus, svcName, flags)) {
 862         case 0: /* Don't start service, return FAIL */
 863                 clu_svc_unlock(svcID);
 864                 return FAIL;
 865         case 1: /* Start service. */
 866                 break;
 867         case 2: /* Don't start service, return SUCCESS */
 868                 clu_svc_unlock(svcID);
 869                 return SUCCESS;
 870
 871         default:
 872                 break;
 873         }
 874
 875         /* LOCK HELD if we get here */
 876 #if 0
 877         if (flags & SVCF_CLEAR_FAILURES)
 878                 memset(svcStatus.sb_failed_mask, 0, sizeof(memb_mask_t));
 879 #endif
 880
 881         svcStatus.sb_owner = myNodeID;
 882         svcStatus.sb_state = SVC_STARTED;
 883         svcStatus.sb_transition = (uint64_t)time(NULL);
 884         svcStatus.sb_checks = (uint16_t)0;
 885
 886         if (flags & (SVCF_START_DISABLED|SVCF_PENDING))
 887                 svcStatus.sb_false_starts = (uint16_t)0;
 888
 889         if (flags & SVCF_RESTARTFAILED)
 890                 svcStatus.sb_restarts++;
 891         else
 892                 svcStatus.sb_restarts = 0;
 893
 894         if (setServiceStatus(&svcStatus) != SUCCESS) {
 895                 clulog(LOG_ERR, "Failed changing service status\n");
 896                 clu_svc_unlock(svcID);
 897                 return FAIL;
 898         }
 899
 900         clu_svc_unlock(svcID);
 901
 902         if ((exec_service_script(SVC_START_STR, svcID, 1, &ret) != SUCCESS) ||
 903             (ret)) {
 904                 return FAIL;
 905         }
 906
 907         return SUCCESS;
 908 }
 909
 910
 911 static int
 912 flip_state(char *svcName, int svcID, int state, int last_owner_flip)
 913 {
 914         ServiceBlock svcStatus;
 915
 916         if (clu_svc_lock(svcID) == -1) {
 917                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
 918                        strerror(errno));
 919                 return ABORT;
 920         }
 921
 922         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
 923                 clu_svc_unlock(svcID);
 924                 clulog(LOG_ERR, "Failed getting status for service %s\n",
 925                        svcName);
 926                 return FAIL;
 927         }
 928
 929         if (last_owner_flip) {
 930                 svcStatus.sb_last_owner = svcStatus.sb_owner;
 931                 svcStatus.sb_owner = NODE_ID_NONE;
 932         }
 933         svcStatus.sb_state = state;
 934         svcStatus.sb_transition = (uint64_t)time(NULL);
 935         if (setServiceStatus(&svcStatus) != SUCCESS) {
 936                 clu_svc_unlock(svcID);
 937                 clulog(LOG_ERR, "Failed changing service status\n");
 938                 return FAIL;
 939         }
 940         clu_svc_unlock(svcID);
 941         return SUCCESS;
 942 }
 943
 944
 945
 946 /**
 947  * Stop a cluster service.
 948  *
 949  * @param svcID         Service ID to stop.
 950  * @param flags         Service-operation specific flags to take into account.
 951  * @see svc_advise_start
 952  * @return              FAIL, SUCCESS
 953  */
 954 static int
 955 svc_stop(int svcID, int flags)
 956 {
 957         ServiceBlock svcStatus;
 958         char *svcName;
 959         int ret;
 960
 961         getSvcName(svcID, &svcName);
 962         clulog(LOG_DEBUG, "Handling stop request for service %s\n", svcName);
 963
 964         if (clu_svc_lock(svcID) == -1) {
 965                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
 966                        strerror(errno));
 967                 return ABORT;
 968         }
 969
 970         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
 971                 clu_svc_unlock(svcID);
 972                 clulog(LOG_ERR, "Failed getting status for service %s\n",
 973                        svcName);
 974                 return FAIL;
 975         }
 976
 977         if (((svcStatus.sb_state != SVC_STARTED) ||
 978              (svcStatus.sb_owner != myNodeID))
 979             && (svcStatus.sb_state != SVC_PENDING)) {
 980                 clu_svc_unlock(svcID);
 981                 clulog(LOG_DEBUG, "Unable to stop service %s in %s state\n",
 982                        svcName, serviceStateStrings[svcStatus.sb_state]);
 983                 return SUCCESS;
 984         }
 985
 986         svcStatus.sb_state = SVC_STOPPING;
 987         svcStatus.sb_transition = (uint64_t)time(NULL);
 988         if (setServiceStatus(&svcStatus) != SUCCESS) {
 989                 clu_svc_unlock(svcID);
 990                 clulog(LOG_ERR, "Failed changing service status\n");
 991                 return FAIL;
 992         }
 993         clu_svc_unlock(svcID);
 994
 995         if ((exec_service_script(SVC_STOP_STR, svcID, 1, &ret) != SUCCESS) ||
 996             (ret)) {
 997                 return FAIL;
 998         }
 999
1000         if (flags & SVCF_PENDING)
1001                 ret = SVC_PENDING;
1002         else
1003                 ret = SVC_STOPPED;
1004
1005         flip_state(svcName, svcID, ret, 1);
1006
1007         return SUCCESS;
1008 }
1009
1010
1011 /**
1012  * Stop a cluster service - without updating the on-disk-block.
1013  *
1014  * @param svcID         Service ID to stop.
1015  * @return              FAIL, SUCCESS
1016  */
1017 static int
1018 svc_stop_unclean(int svcID)
1019 {
1020         int ret;
1021         char *svcName;
1022
1023         /*
1024          * Infanticide.
1025          */
1026         if (svc_children[svcID].cs_pid) {
1027                 kill(svc_children[svcID].cs_pid, SIGKILL);
1028
1029                 do {
1030                         if ((waitpid(svc_children[svcID].cs_pid, NULL, 0)==-1)
1031                             && (errno == EINTR))
1032                                 continue;
1033                 } while (0);
1034         }
1035
1036         getSvcName(svcID, &svcName);
1037         clulog(LOG_WARNING, "Forcing stop of service %s\n", svcName);
1038
1039         if ((exec_service_script(SVC_STOP_STR, svcID, 1, &ret) != SUCCESS) ||
1040             (ret)) {
1041                 clulog(LOG_EMERG,
1042                        "Failed to stop service %s uncleanly - REBOOTING\n",
1043                        svcName);
1044                 sleep(1);
1045                 REBOOT(RB_AUTOBOOT);
1046         }
1047         return SUCCESS;
1048 }
1049
1050
1051 /**
1052  * Disable a cluster service.  Services in the disabled state are never
1053  * automatically started by the service manager - one must send a SVC_START
1054  * message.
1055  *
1056  * @param svcID         Service ID to stop.
1057  * @return              FAIL, SUCCESS
1058  */
1059 static int
1060 svc_disable(int svcID)
1061 {
1062         ServiceBlock svcStatus;
1063         char *svcName;
1064         int ret;
1065
1066         if (clu_svc_lock(svcID) == -1) {
1067                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1068                        strerror(errno));
1069                 return FAIL;
1070         }
1071
1072         getSvcName(svcID, &svcName);
1073         clulog(LOG_DEBUG, "Handling disable request for service %s\n", svcName);
1074
1075         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1076                 clu_svc_unlock(svcID);
1077                 clulog(LOG_ERR, "Failed getting status for service %s\n",
1078                        svcName);
1079                 return FAIL;
1080         }
1081
1082         if (svcStatus.sb_state == SVC_DISABLED) {
1083                 clu_svc_unlock(svcID);
1084                 clulog(LOG_DEBUG, "Service %s already disabled\n", svcName);
1085                 return SUCCESS;
1086         }
1087
1088         if (((svcStatus.sb_state == SVC_STOPPING) &&
1089              (svcStatus.sb_owner != myNodeID)) &&
1090             (memb_online(membership, svcStatus.sb_owner)==1)) {
1091                 clulog(LOG_WARNING,
1092                        "Service %s is in stop-transition on node %d"
1093                        ", cannot disable\n",
1094                        svcName);
1095                 return SUCCESS;
1096         }
1097
1098         if (((svcStatus.sb_state == SVC_STARTED) &&
1099              (svcStatus.sb_owner != myNodeID))
1100             || ((svcStatus.sb_state != SVC_STARTED)
1101                 && (svcStatus.sb_state != SVC_STOPPING)
1102                 && (svcStatus.sb_state != SVC_STOPPED)
1103                 && (svcStatus.sb_state != SVC_PENDING)
1104                 && (svcStatus.sb_state != SVC_FAILED))) {
1105                 clu_svc_unlock(svcID);
1106                 clulog(LOG_DEBUG, "Unable to disable service %s in %s state\n",
1107                        svcName, serviceStateStrings[svcStatus.sb_state]);
1108                 return FAIL;
1109         }
1110
1111         svcStatus.sb_state = SVC_STOPPING;
1112         svcStatus.sb_transition = (uint64_t)time(NULL);
1113         if (setServiceStatus(&svcStatus) != SUCCESS) {
1114                 clu_svc_unlock(svcID);
1115                 clulog(LOG_ERR, "Failed changing service status\n");
1116                 return FAIL;
1117         }
1118         clu_svc_unlock(svcID);
1119
1120         if ((exec_service_script(SVC_STOP_STR, svcID, 1, &ret) != SUCCESS) ||
1121             (ret)) {
1122                 return FAIL;
1123         }
1124
1125         flip_state(svcName, svcID, SVC_DISABLED, 1);
1126
1127         return SUCCESS;
1128 }
1129
1130
1131 /**
1132  * Mark a cluster service as failed.  User intervention required.
1133  *
1134  * @param svcID         Service ID to stop.
1135  * @return              FAIL, SUCCESS
1136  */
1137 static int
1138 _svc_fail(int svcID)
1139 {
1140         ServiceBlock svcStatus;
1141         char *svcName;
1142
1143         if (clu_svc_lock(svcID) == -1) {
1144                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1145                        strerror(errno));
1146                 return FAIL;
1147         }
1148
1149         getSvcName(svcID, &svcName);
1150         clulog(LOG_DEBUG, "Handling failure request for service %s\n", svcName);
1151
1152         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1153                 clu_svc_unlock(svcID);
1154                 clulog(LOG_ERR, "Failed getting status for service %s\n",
1155                        svcName);
1156                 return FAIL;
1157         }
1158
1159         if ((svcStatus.sb_state == SVC_STARTED) &&
1160             (svcStatus.sb_owner != myNodeID)) {
1161                 clu_svc_unlock(svcID);
1162                 clulog(LOG_DEBUG, "Unable to disable service %s in %s state\n",
1163                        svcName, serviceStateStrings[svcStatus.sb_state]);
1164                 return FAIL;
1165         }
1166
1167         /*
1168          * Leave a bread crumb so we can debug the problem with the service!
1169          */
1170         if (svcStatus.sb_owner != NODE_ID_NONE) {
1171                 svcStatus.sb_last_owner = svcStatus.sb_owner;
1172                 svcStatus.sb_owner = NODE_ID_NONE;
1173         }
1174         svcStatus.sb_state = SVC_FAILED;
1175         svcStatus.sb_transition = (uint64_t)time(NULL);
1176         svcStatus.sb_restarts = 0;
1177         if (setServiceStatus(&svcStatus) != SUCCESS) {
1178                 clu_svc_unlock(svcID);
1179                 clulog(LOG_ERR, "Failed changing service status\n");
1180                 return FAIL;
1181         }
1182         clu_svc_unlock(svcID);
1183
1184         return SUCCESS;
1185 }
1186
1187
1188 /**
1189  * Check the status of a given service.  This execs the service script
1190  * with the argument 'status', and evaluates the return code.
1191  *
1192  * @param svcID         Service ID to check.
1193  * @return              FAIL or SUCCESS.
1194  */
1195 static int
1196 svc_check(int svcID)
1197 {
1198         ServiceBlock svcStatus;
1199         char *svcName, *maxrestarts, *maxfs;
1200         int script_ret, ret;
1201
1202         getSvcName(svcID, &svcName);
1203         clulog(LOG_DEBUG, "Handling check request for service %s\n", svcName);
1204
1205         if (clu_svc_lock(svcID) == -1) {
1206                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1207                        strerror(errno));
1208                 return FAIL;
1209         }
1210
1211         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1212                 clu_svc_unlock(svcID);
1213                 clulog(LOG_ERR, "Failed getting status for service %s\n",
1214                        svcName);
1215                 return FAIL;
1216         }
1217
1218         if ((svcStatus.sb_state != SVC_STARTED) ||
1219             (svcStatus.sb_owner != myNodeID)) {
1220                 clu_svc_unlock(svcID);
1221                 clulog(LOG_DEBUG, "Unable to check service %s in %s state\n",
1222                        svcName, serviceStateStrings[svcStatus.sb_state]);
1223                 return FAIL;
1224         }
1225         clu_svc_unlock(svcID);
1226
1227         if ((exec_service_script(SVC_CHECK_STR, svcID, 1, &ret) != SUCCESS) ||
1228             (ret))
1229                 script_ret = FAIL;
1230         else
1231                 script_ret = SUCCESS;
1232
1233         clu_svc_lock(svcID);
1234         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1235                 clu_svc_unlock(svcID);
1236                 clulog(LOG_ERR, "Failed getting status for service %s\n",
1237                        svcName);
1238                 return FAIL;
1239         }
1240
1241         if (script_ret == FAIL) {
1242                 ret = FAIL;
1243
1244                 if (svcStatus.sb_checks == 0 &&
1245                     (getSvcMaxFalseStarts(svcID, &maxfs) == SUCCESS) &&
1246                     atoi(maxfs) > 0) {
1247
1248                         /* If we've exceeded false-start count, relocate */
1249                         svcStatus.sb_false_starts++;
1250                         clulog(LOG_WARNING,
1251                                "Service %s false-start detected (%d/%d)\n",
1252                                svcName, svcStatus.sb_false_starts, atoi(maxfs));
1253
1254                         if (svcStatus.sb_false_starts > atoi(maxfs)) {
1255                                 clulog(LOG_ERR, "Max false starts for service %s"
1256                                        " exceeded.  Relocating\n", svcName);
1257                                 ret = ABORT;
1258                         }
1259
1260                         /* Update on-disk with new false start info */
1261                         setServiceStatus(&svcStatus);
1262                 }
1263
1264                 if (getSvcMaxRestarts(svcID, &maxrestarts) == SUCCESS) {
1265                         if (atoi(maxrestarts) > 0) {
1266                                 /* We're about to restart.  If we would exceed
1267                                    our restart count, relocate. */
1268                                 if (svcStatus.sb_restarts >=
1269                                     atoi(maxrestarts)) {
1270                                         clulog(LOG_ERR, "Max restarts for "
1271                                                "service %s exceeded.  "
1272                                                "Relocating\n", svcName);
1273                                         ret = ABORT;
1274                                 }
1275                         } else if (atoi(maxrestarts) < 0) {
1276                                 clulog(LOG_ERR, "Service %s failed.  "
1277                                        "Relocating\n", svcName);
1278                                 ret = ABORT;
1279                         }
1280                 }
1281         } else { /* SUCCESS */
1282                 ret = SUCCESS;
1283                 if (!svcStatus.sb_checks) {
1284                         svcStatus.sb_checks = 1;
1285                         svcStatus.sb_false_starts = 0;
1286                         setServiceStatus(&svcStatus);
1287                 }
1288         }
1289
1290         clu_svc_unlock(svcID);
1291
1292         return ret;
1293 }
1294
1295
1296 static int
1297 init_services(void)
1298 {
1299         int svcID;
1300         ServiceBlock svcStatus;
1301         char *svcName;
1302
1303         clulog(LOG_INFO, "Initializing services\n");
1304
1305         for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
1306
1307                 /* This takes a long time... Abort quickly if necessary */
1308                 if (sigterm_received)
1309                         svcmgr_exit(0, 1);
1310
1311                 if (serviceExists(svcID) != YES)
1312                         continue;
1313
1314                 getSvcName(svcID, &svcName);
1315
1316                 /*
1317                  * If service is not on the shared service information disk,
1318                  * or it is running and owned by this node, reinitialized it.
1319                  */
1320
1321                 if (clu_svc_lock(svcID) == 0) {
1322                         if ((getServiceStatus(svcID, &svcStatus) != SUCCESS) ||
1323                             ((svcStatus.sb_owner == myNodeID) &&
1324                              ((svcStatus.sb_state == SVC_STARTED) ||
1325                               (svcStatus.sb_state == SVC_STOPPING))) ||
1326                             ((svcStatus.sb_owner == NODE_ID_NONE) &&
1327                              (svcStatus.sb_state == SVC_PENDING))) {
1328                                 svcStatus.sb_id = svcID;
1329                                 svcStatus.sb_last_owner = svcStatus.sb_owner;
1330                                 svcStatus.sb_owner = NODE_ID_NONE;
1331                                 svcStatus.sb_state = SVC_STOPPED;
1332                                 svcStatus.sb_transition = (uint64_t)time(NULL);
1333                                 svcStatus.sb_restarts = 0;
1334                                 if (setServiceStatus(&svcStatus) != SUCCESS) {
1335                                         clulog(LOG_ERR, "Failed setting "
1336                                                "service status for %s\n",
1337                                                svcName);
1338                                 }
1339                         }
1340                         clu_svc_unlock(svcID);
1341                 } else {
1342                         clulog(LOG_WARNING,
1343                                "Unable to obtain lock for service %s: %s\n",
1344                                svcName,
1345                                strerror(errno));
1346                 }
1347
1348                 /*
1349                  * We stop all services to clean up any state in the case
1350                  * that this system came down without gracefully stopping
1351                  * services.
1352                  */
1353                 if (exec_service_script(SVC_STOP_STR, svcID, 1, NULL) !=
1354                     SUCCESS) {
1355                         clulog(LOG_ALERT,
1356                                "Failed stopping service %s during init\n",
1357                                svcName);
1358                         continue;
1359                 }
1360
1361         }
1362
1363         return SUCCESS;
1364 }
1365
1366
1367 /*
1368  * Send a message to the target node to start the service.
1369  */
1370 static int
1371 relocate_service(int svcID, int request, int target)
1372 {
1373         SmMessageSt msg_relo;
1374         int fd_relo, msg_ret;
1375
1376         /* Build the message header */
1377         msg_relo.sm_hdr.gh_magic = GENERIC_HDR_MAGIC;
1378         msg_relo.sm_hdr.gh_command = SVC_ACTION_REQUEST;
1379         msg_relo.sm_hdr.gh_length = sizeof (SmMessageSt);
1380         msg_relo.sm_data.d_action = request;
1381         msg_relo.sm_data.d_svcID = svcID;
1382         msg_relo.sm_data.d_ret = 0;
1383
1384         /* Open a connection to the other node */
1385
1386         if ((fd_relo = msg_open(PROCID_CLUSVCMGRD, target)) < 0) {
1387                 clulog(LOG_ERR, "Failed opening connection to member #%d\n",
1388                        target);
1389                 return FAIL;
1390         }
1391
1392         /* Encode */
1393         swab_SmMessageSt(&msg_relo);
1394
1395         /* Send relocate message to the other node */
1396         if (msg_send(fd_relo, &msg_relo, sizeof (SmMessageSt)) !=
1397             sizeof (SmMessageSt)) {
1398                 clulog(LOG_ERR,
1399                        "Error sending relocate request to member #%d\n",
1400                        target);
1401                 msg_close(fd_relo);
1402                 return FAIL;
1403         }
1404
1405         clulog(LOG_DEBUG, "Sent relocate request.\n");
1406
1407         /* Check the response */
1408         msg_ret = msg_receive(fd_relo, &msg_relo, sizeof (SmMessageSt));
1409
1410         if (msg_ret != sizeof (SmMessageSt)) {
1411                 /*
1412                  * In this case, we don't restart the service, because the
1413                  * service state is actually unknown to us at this time.
1414                  */
1415                 clulog(LOG_ERR, "Mangled reply from member #%d during service "
1416                        "relocate\n", target);
1417                 msg_close(fd_relo);
1418                 return SUCCESS; /* XXX really UNKNOWN */
1419         }
1420
1421         /* Got a valid response from other node. */
1422         msg_close(fd_relo);
1423
1424         /* Decode */
1425         swab_SmMessageSt(&msg_relo);
1426
1427         return msg_relo.sm_data.d_ret;
1428 }
1429
1430
1431 /**
1432  * Advise whether or not we should drop a particular request for a given
1433  * service.
1434  *
1435  * @param svcID         Service ID in question.
1436  * @param req           Particular request in question.
1437  * @param svcStatus     Current service status block.
1438  * @return              1 for TRUE (drop service request), 0 for FALSE (do not
1439  *                      drop given request)
1440  */
1441 int
1442 svc_advise_drop_request(int svcID, int req, ServiceBlock * svcStatus)
1443 {
1444         /*
1445          * Drop the request if it's not a DISABLE and not a START_PENDING
1446          * if the service is in the PENDING state (ie, it failed on one node)
1447          */
1448         if ((svcStatus->sb_state == SVC_PENDING) &&
1449             ((req != SVC_START_PENDING) && (req != SVC_DISABLE))) {
1450                 clulog(LOG_DEBUG,
1451                        "Dropping op %d for svc%d: Service Pending Start\n",
1452                        req, svcID);
1453                 return 1;
1454         }
1455
1456         /*
1457          * Drop the request if it's an SVC_CHECK and the service isn't started.
1458          */
1459         if ((req == SVC_CHECK) &&
1460             ((svcStatus->sb_state != SVC_STARTED) ||
1461              (svcStatus->sb_owner != myNodeID))) {
1462                 clulog(LOG_DEBUG, "Dropping SVC_CHECK for svc%d: Service "
1463                        "not running locally\n", svcID);
1464                 return 1;
1465         }
1466
1467         /*
1468          * Drop the request if it's an SVC_CHECK and we're already doing
1469          * something to that service so that other requests may continue.
1470          */
1471         if ((req == SVC_CHECK) && svc_children[svcID].cs_pid) {
1472                 clulog(LOG_DEBUG,
1473                        "Dropping SVC_CHECK for svc%d: PID%d has not completed",
1474                        svcID, svc_children[svcID].cs_pid);
1475                 return 1;
1476         }
1477
1478         /*
1479          * Drop the request if it's an SVC_START, we are the owner, and
1480          * the service is currently stopping
1481          */
1482         if ((req == SVC_START) && svc_children[svcID].cs_pid) {
1483                 clulog(LOG_DEBUG,
1484                        "Dropping SVC_START for svc%d: PID%d has not completed",
1485                        svcID, svc_children[svcID].cs_pid);
1486                 return 1;
1487         }
1488
1489         return 0;
1490 }
1491
1492
1493 /**
1494  * Determine the target node we should relocate the service to if we are
1495  * not given one from cluadmin.  This checks the failover domain to see
1496  * the next node online in a given failover group.
1497  *
1498  * @param rmask         The nodes allowed to be checked for when we are
1499  *                      trying to determine who should start the service.
1500  * @param current_owner The current owner of the service, or the node
1501  *                      who is requesting the information.  This is the
1502  *                      _last_ member allowed to run the service.
1503  * @param svcID         ID of the service in question.
1504  */
1505 int
1506 best_target_node(memb_mask_t rmask, int current_owner, int svcID)
1507 {
1508         int x;
1509
1510         x = current_owner + 1;
1511         if (x >= MAX_NODES)
1512                 x = 0;
1513
1514         do {
1515                 if (node_should_start(x, rmask, svcID) == FOD_BEST) {
1516                         return x;
1517                 }
1518
1519                 x++;
1520                 if (x >= MAX_NODES)
1521                         x = 0;
1522         } while (x != current_owner);
1523
1524         return current_owner;
1525 }
1526
1527
1528 #if 0
1529 /**
1530  * clear_failure_mask(int svcID)
1531  *
1532  * @param svcID
1533  * @see mark_self_failed
1534  */
1535 int
1536 clear_failure_mask(int svcID)
1537 {
1538         ServiceBlock svcStatus;
1539
1540         if (clu_svc_lock(svcID) == -1) {
1541                 clulog(LOG_ERR, "Couldn't obtain lock for service %d: %s\n",
1542                        svcID, strerror(errno));
1543                 return FAIL;
1544         }
1545
1546         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1547                 clulog(LOG_ERR, "Couldn't obtain status for service %d\n",
1548                        svcID);
1549                 clu_svc_unlock(svcID);
1550                 return FAIL;
1551         }
1552
1553         memset(svcStatus.sb_failed_mask, 0, sizeof(svcStatus.sb_failed_mask));
1554         if (setServiceStatus(&svcStatus) != SUCCESS) {
1555                 clulog(LOG_ERR, "Couldn't set FAILURE status for service %d\n",
1556                        svcID);
1557                 return FAIL;
1558         }
1559
1560         clu_svc_unlock(svcID);
1561         return SUCCESS;
1562 }
1563
1564 /**
1565  * Marks our bit in the failed_nodes bitmask in the service block on disk.
1566  * This is a signal to other members to _not_ send us the service again.
1567  * This mask is cleared when a service is successfully started.
1568  *
1569  * @param svcID
1570  * @see clear_failure_mask
1571  */
1572 int
1573 mark_self_failed(int svcID)
1574 {
1575         ServiceBlock svcStatus;
1576
1577         if (clu_svc_lock(svcID) == -1) {
1578                 clulog(LOG_ERR, "Couldn't obtain lock for service %d: %s\n",
1579                        svcID, strerror(errno));
1580                 return FAIL;
1581         }
1582
1583         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1584                 clulog(LOG_ERR, "Couldn't obtain status for service %d\n",
1585                        svcID);
1586                 clu_svc_unlock(svcID);
1587                 return FAIL;
1588         }
1589
1590         /* Mark ourselves as FAILED for this service */
1591         memb_mark_up(svcStatus.sb_failed_mask, myNodeID);
1592
1593         if (setServiceStatus(&svcStatus) != SUCCESS) {
1594                 clulog(LOG_ERR, "Couldn't set FAILURE status for service %d\n",
1595                        svcID);
1596                 return FAIL;
1597         }
1598
1599         clu_svc_unlock(svcID);
1600
1601         return SUCCESS;
1602 }
1603 #endif
1604
1605
1606 int
1607 svc_report_failure(int svcID)
1608 {
1609         ServiceBlock svcStatus;
1610         char *svcName;
1611         char *nodeName;
1612
1613         getSvcName(svcID, &svcName);
1614
1615         if (clu_svc_lock(svcID) == -1) {
1616                 clulog(LOG_ERR, "Couldn't obtain lock for service %s: %s\n",
1617                        svcName, strerror(errno));
1618                 return FAIL;
1619         }
1620
1621         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1622                 clulog(LOG_ERR, "Couldn't obtain status for service %s\n",
1623                        svcName);
1624                 clu_svc_unlock(svcID);
1625                 return FAIL;
1626         }
1627         clu_svc_unlock(svcID);
1628
1629         getNodeName(svcStatus.sb_last_owner, &nodeName);
1630
1631         clulog(LOG_ALERT,
1632                "Service %s returned failure code.  Last Owner: %s\n",
1633                svcName, nodeName);
1634         clulog(LOG_ALERT,
1635                "Administrator intervention required.\n",
1636                svcName, nodeName);
1637
1638         return SUCCESS;
1639 }
1640
1641 /**
1642  * handle_relocate_req - Relocate a service.  This seems like a huge
1643  * deal, except it really isn't.
1644  *
1645  * @param svcID         Service ID in question.
1646  * @param flags         If (flags & SVCF_PENDING), we were called from
1647  *                      handle_start_req - and so we should ignore all local
1648  *                      restarts/stops - since handle_start_req does this
1649  *                      for us.
1650  * @param preferred_target      When sent a relocate message from the
1651  *                              management software, a destination node
1652  *                              is sent as well.  This causes us to try
1653  *                              starting the service on that node *first*,
1654  *                              but does NOT GUARANTEE that the service
1655  *                              will end up on that node.  It will end up
1656  *                              on whatever node actually successfully
1657  *                              starts it.
1658  * @param new_owner     Member who actually ends up owning the service.
1659  */
1660 int
1661 handle_relocate_req(int svcID, int flags, int preferred_target,
1662                     uint32_t *new_owner)
1663 {
1664         memb_mask_t allowed_nodes;
1665         int target = preferred_target;
1666         int request;
1667         char *nodeName=NULL, *svcName=NULL;
1668
1669         getSvcName(svcID, &svcName);
1670         request = (flags & SVCF_PENDING) ? SVC_START_PENDING :
1671                 SVC_START_RELOCATE;
1672
1673         /*
1674          * Stop the service - if we haven't already done so.
1675          */
1676         if (!(flags & SVCF_PENDING)) {
1677                 if (svc_stop(svcID, flags) != SUCCESS) {
1678                         if (svc_start(svcID, flags) != SUCCESS)
1679                                 svc_fail(svcID);
1680                         return FAIL;
1681                 }
1682         }
1683
1684         /*
1685          * First, see if it's legal to relocate to the target node.  Legal
1686          * means: the node is online and is in the [restricted] failover
1687          * domain of the service, or the service has no failover domain.
1688          */
1689         if (preferred_target >= 0 && preferred_target <= MAX_NODES) {
1690
1691                 memset(allowed_nodes, 0, sizeof(allowed_nodes));
1692                 memb_mark_up(allowed_nodes, preferred_target);
1693                 target = best_target_node(allowed_nodes, myNodeID, svcID);
1694
1695                 /*
1696                  * I am the ONLY one capable of running this service,
1697                  * PERIOD...
1698                  */
1699                 if (target == myNodeID)
1700                         goto exhausted;
1701
1702                 if (target == preferred_target) {
1703                         /*
1704                          * It's legal to start the service on the given
1705                          * node.  Try to do so.
1706                          */
1707                         if (relocate_service(svcID, request, target) ==
1708                             SUCCESS) {
1709                                 *new_owner = target;
1710                                 /*
1711                                  * Great! We're done...
1712                                  */
1713                                 return SUCCESS;
1714                         }
1715                 }
1716         }
1717
1718         /*
1719          * Ok, so, we failed to send it to the preferred target node.
1720          * Try to start it on all other nodes.
1721          */
1722         memcpy(allowed_nodes, membership, sizeof(memb_mask_t));
1723         memb_mark_down(allowed_nodes, myNodeID);
1724
1725         /* Don't try to relocate to the preferred target more than once. */
1726         if (preferred_target >= 0 && preferred_target <= MAX_NODES)
1727                 memb_mark_down(allowed_nodes, myNodeID);
1728
1729         while (memb_count(allowed_nodes)) {
1730                 target = best_target_node(allowed_nodes, myNodeID, svcID);
1731                 if (target == myNodeID)
1732                         goto exhausted;
1733
1734                 switch (relocate_service(svcID, request, target)) {
1735                 case FAIL:
1736                         memb_mark_down(allowed_nodes, target);
1737                         continue;
1738                 case ABORT:
1739                         svc_report_failure(svcID);
1740                         return FAIL;
1741                 case SUCCESS:
1742                         *new_owner = target;
1743                         getNodeName(target, &nodeName);
1744                         clulog(LOG_NOTICE,
1745                                "Service %s now running on member %s\n",
1746                                svcName, nodeName);
1747                         return SUCCESS;
1748                 default:
1749                         clulog(LOG_ERR, "Invalid reply from member %d during"
1750                                " relocate operation!\n", target);
1751                 }
1752         }
1753
1754         /*
1755          * We got sent here from handle_start_req.
1756          * We're DONE.
1757          */
1758         if (flags & SVCF_PENDING)
1759                 return FAIL;
1760
1761         /*
1762          * All potential places for the service to start have been exhausted.
1763          * We're done.
1764          */
1765 exhausted:
1766         clulog(LOG_WARNING, "Attempting to restart service %s locally.\n",
1767                svcName);
1768         if (svc_start(svcID, flags) == SUCCESS) {
1769                 *new_owner = myNodeID;
1770                 return FAIL;
1771         }
1772
1773         if (svc_stop(svcID, 0) != SUCCESS) {
1774                 svc_fail(svcID);
1775                 svc_report_failure(svcID);
1776         }
1777
1778         return FAIL;
1779 }
1780
1781
1782 /**
1783  * handle_start_req - Handle a generic start request from a user or during
1784  * service manager boot.
1785  *
1786  * @param svcID         Service ID to start.
1787  * @param flags
1788  * @param new_owner     Owner which actually started the service.
1789  * @return              FAIL - Failure.
1790  *                      SUCCESS - The service is running.
1791  */
1792 int
1793 handle_start_req(int svcID, int flags, uint32_t *new_owner)
1794 {
1795         int ret, tolerance = FOD_BEST, target = -1;
1796
1797         /*
1798          * When a service request is from a user application (eg, clusvcadm),
1799          * accept FOD_GOOD instead of FOD_BEST
1800          */
1801         if (flags & SVCF_START_DISABLED)
1802                 tolerance = FOD_GOOD;
1803
1804         if (!(flags & SVCF_RESTART) &&
1805             (node_should_start(myNodeID, membership, svcID) < tolerance)) {
1806
1807                 /* Try to send to someone else who might care about it */
1808                 target = best_target_node(membership, myNodeID, svcID);
1809                 ret = handle_relocate_req(svcID, SVCF_PENDING, target,
1810                                           new_owner);
1811
1812                 if (ret == FAIL)
1813                         svc_disable(svcID);
1814                 return ret;
1815         }
1816
1817         /*
1818          * Strip out all flags which are invalid.
1819          */
1820         clulog(LOG_DEBUG, "Starting service %d - flags 0x%08x\n", svcID,
1821                flags);
1822
1823 #if 0
1824         /*
1825          * This is a 'root' start request.  We need to clear out our failure
1826          * mask here - so that we can try all nodes if necessary.
1827          */
1828         flags |= SVCF_CLEAR_FAILURES;
1829 #endif
1830         ret = svc_start(svcID, flags);
1831
1832 #if 0
1833         if (clear_failure_mask(svcID) != SUCCESS) {
1834                 clulog(LOG_WARNING, "Could not clear failure bitmask for "
1835                        "service #%s!\n", svcName);
1836         }
1837 #endif
1838
1839         /*
1840          * If we succeeded, then we're done.
1841          */
1842         if (ret == SUCCESS) {
1843                 *new_owner = myNodeID;
1844                 return SUCCESS;
1845         }
1846
1847         /*
1848          * Keep the state open so the other nodes don't try to start
1849          * it.  This allows us to be the 'root' of a given service.
1850          */
1851         clulog(LOG_DEBUG, "Stopping failed service %d\n", svcID);
1852         if (svc_stop(svcID, SVCF_PENDING) != SUCCESS) {
1853                 clulog(LOG_CRIT, "Service %d failed to stop cleanly", svcID);
1854                 svc_fail(svcID);
1855
1856                 /*
1857                  * If we failed to stop the service, we're done.  At this
1858                  * point, we can't determine the service's status - so
1859                  * trying to start it on other nodes is right out.
1860                  */
1861                 return ABORT;
1862         }
1863
1864         /*
1865          * OK, it failed to start - but succeeded to stop.  Now,
1866          * we should relocate the service.
1867          */
1868         clulog(LOG_WARNING, "Relocating failed service %d\n", svcID);
1869         ret = handle_relocate_req(svcID, SVCF_PENDING, -1, new_owner);
1870
1871         if (ret == FAIL)
1872                 svc_disable(svcID);
1873
1874         return ret;
1875 }
1876
1877
1878 /**
1879  * handle_start_remote_req - Handle a remote start request.
1880  *
1881  * @param svcID         Service ID to start.
1882  * @param flags         Flags to use to determine start behavior.
1883  * @return              FAIL - Local failure.  ABORT - Unrecoverable error:
1884  *                      the service didn't start, nor stop cleanly. SUCCESS
1885  *                      - We started the service.
1886  */
1887 int
1888 handle_start_remote_req(int svcID, int flags)
1889 {
1890         memb_mask_t rmask;
1891         int tolerance = FOD_BEST;
1892
1893         memset(rmask, 0, sizeof(rmask));
1894         memb_mark_up(rmask, myNodeID);
1895
1896         if (flags & SVCF_START_DISABLED)
1897                 tolerance = FOD_GOOD;
1898
1899         /*
1900          * See if we agree with our ability to start the given service.
1901          */
1902         if (node_should_start(myNodeID, rmask, svcID) < tolerance)
1903                 return FAIL;
1904
1905         if (svc_start(svcID, flags) == SUCCESS)
1906                 return SUCCESS;
1907
1908 #if 0
1909         if (mark_self_failed(svcID) == FAIL) {
1910                 svc_fail(svcID);
1911                 return ABORT;
1912         }
1913 #endif
1914
1915         if (svc_stop(svcID, 0) == SUCCESS)
1916                 return FAIL;
1917
1918         svc_fail(svcID);
1919         return ABORT;
1920 }
1921
1922
1923 /**
1924  * Handle a request regarding a service.
1925  *
1926  * @param svcID         ID of service in question.
1927  * @param action        Action to be performed on the service.
1928  * @param target        In the case of a relocate, target/destination node
1929  *                      we're relocating to.
1930  * @param fd            File descriptor on which we send our response.
1931  */
1932 void
1933 handle_svc_request(int svcID, int action, int target, msg_handle_t fd)
1934 {
1935         char *svcName;
1936         SmMessageSt msg_sm;
1937         int ret = FAIL;
1938         ServiceBlock svcStatus;
1939         int flags = 0;
1940         uint32_t new_owner = NODE_ID_NONE;
1941         char child = 0;
1942
1943         getSvcName(svcID, &svcName);
1944         clulog(LOG_DEBUG, "Service %s request %d\n", svcName, action);
1945
1946         if (myNodeState != NODE_UP)
1947                 goto out;
1948
1949         /*
1950          * Don't assume the service exists...
1951          */
1952         if (serviceExists(svcID) != YES) {
1953                 goto out;
1954         }
1955
1956         if (clu_svc_lock(svcID) == -1) {
1957                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
1958                        strerror(errno));
1959                 goto out;
1960         }
1961
1962         if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
1963                 clulog(LOG_ERR, "Cannot get status for service %d\n", svcID);
1964                 clu_svc_unlock(svcID);
1965                 goto out;
1966         }
1967
1968         clu_svc_unlock(svcID);
1969
1970         /*
1971          * Check to see if we should drop the service request.  This
1972          * is based on the current servuce status, the action required,
1973          * etc.
1974          */
1975         if (svc_advise_drop_request(svcID, action, &svcStatus)) {
1976                 ret = SUCCESS;
1977                 goto out;
1978         }
1979
1980         /*
1981          * Fork so that we can run service actions in parallel.
1982          */
1983         while (svc_children[svcID].cs_pid != 0) {
1984
1985                 if (svcStatus.sb_state == SVC_PENDING) {
1986                         /*
1987                          * Shouldn't get here, but if so, avoid deadlock.
1988                          */
1989                         clulog(LOG_ERR, "%s failed during "
1990                                "relocate-to-preferred member operation",
1991                                svcName);
1992                         ret = FAIL;
1993                         goto out;
1994                 }
1995
1996                 clulog(LOG_DEBUG,
1997                        "Proc %d already running action on service %s\n",
1998                        svc_children[svcID].cs_pid, svcName);
1999
2000                 /* See if we missed the SIGCHLD */
2001                 if (!cleanup_child(svcID))
2002                         sleep(5);
2003         }
2004
2005         /* Record what the child will be doing */
2006         svc_children[svcID].cs_rq = action;
2007
2008         if ((svc_children[svcID].cs_pid = fork())) {
2009                 if (svc_children[svcID].cs_pid < 0) {
2010                         /*
2011                          * Fork failed.
2012                          */
2013                         clulog(LOG_DEBUG,
2014                                "Fork failed handling action request.\n");
2015                         svc_children[svcID].cs_pid = 0;
2016                         svc_children[svcID].cs_rq = 0;
2017
2018                         /* Send reply, if applicable */
2019                         goto out;
2020                 }
2021
2022                 clulog(LOG_DEBUG, "[M] Pid %d -> %s for service %s\n",
2023                        svc_children[svcID].cs_pid, serviceActionStrings[action],
2024                        svcName);
2025
2026                 return;
2027         }
2028
2029         block_signal(SIGTERM);
2030         block_signal(SIGHUP);
2031         child = 1;
2032         clulog(LOG_DEBUG, "[C] Pid %d handling %s request for service %s\n",
2033                getpid(), serviceActionStrings[action], svcName);
2034
2035         switch (action) {
2036         case SVC_START:
2037 start_top:
2038                 flags |= ((fd == -1) ? 0 : SVCF_START_DISABLED);
2039
2040                 ret = handle_start_req(svcID, flags, &new_owner);
2041                 break;
2042
2043         case SVC_START_PENDING:
2044                 /*
2045                  * We allow starting of pending requests only if
2046                  * explicitly asked for from someone else - never on
2047                  * a local node event.
2048                  */
2049                 flags = SVCF_PENDING;
2050         case SVC_START_RELOCATE:
2051                 /*
2052                  * We use fd as an indicator to see whether or not we
2053                  * were called on behalf of a node event.  Generally,
2054                  * fd is set, but we usually don't handle relocation of
2055                  * disabled services -- it's kind of an anomaly.
2056                  */
2057                 flags |= (fd == -1) ? 0 : SVCF_START_DISABLED;
2058                 ret = handle_start_remote_req(svcID, flags);
2059                 break;
2060
2061         case SVC_STOP:
2062         case SVC_RESTART:
2063 restart_top:
2064                 if ((ret = svc_stop(svcID, 0)) == SUCCESS) {
2065                         /*
2066                          * Ok, we did the stop - now do the whole start
2067                          * process, including relocating in the case of
2068                          * failure.
2069                          */
2070                         if (action == SVC_RESTART) {
2071                                 target = myNodeID;
2072                                 flags = SVCF_RESTART;
2073                                 goto start_top;
2074                         }
2075
2076                         break;
2077                 }
2078
2079                 ret = FAIL;
2080
2081                 if (svc_start(svcID, 0) == SUCCESS)
2082                         break;
2083
2084                 svc_fail(svcID);
2085                 break;
2086
2087         case SVC_DISABLE:
2088
2089                 if ((ret = svc_disable(svcID)) == SUCCESS)
2090                         break;
2091
2092                 /*
2093                  * We don't run svc_fail here because svc_fail could
2094                  * put us back where we were.  Always allow disable.
2095                  */
2096                 ret = FAIL;
2097                 break;
2098
2099         case SVC_CHECK:
2100
2101                 if ((ret = svc_check(svcID)) == SUCCESS)
2102                         break;
2103
2104                 if (ret == ABORT) {
2105                         /* Try to relocate service at this point */
2106                         ret = handle_relocate_req(svcID, 0, -1, &new_owner);
2107                         break;
2108                 }
2109
2110                 ret = FAIL;
2111
2112                 clulog(LOG_WARNING, "Restarting locally failed service %s\n",
2113                        svcName);
2114
2115                 (void) svc_stop(svcID, 0);
2116
2117                 /*
2118                  * Try the whole start process, including relocating it in
2119                  * the case that it failed to restart locally.
2120                  */
2121                 flags = SVCF_RESTART | SVCF_RESTARTFAILED;
2122                 goto start_top;
2123
2124         case SVC_RELOCATE:
2125
2126                 if (svcStatus.sb_state == SVC_DISABLED) {
2127                         clulog(LOG_DEBUG,
2128                                "Can not relocate disabled service %s\n",
2129                                svcName);
2130                         ret = FAIL;
2131                         break;
2132                 }
2133
2134                 if (target == myNodeID)
2135                         goto restart_top;
2136
2137                 ret = handle_relocate_req(svcID, 0, target, &new_owner);
2138                 break;
2139
2140         default:
2141                 clulog(LOG_ERR, "Invalid service request %d\n", action);
2142                 ret = FAIL;
2143                 break;
2144         }
2145
2146         /*
2147          * If fd is valid, the request was on behalf of a client who is
2148          * blocking for the status reply.
2149          */
2150 out:
2151         if (fd != -1) {
2152                 msg_sm.sm_data.d_svcOwner = new_owner;
2153                 msg_sm.sm_data.d_ret = ret;
2154
2155                 /* Encode before responding... */
2156                 swab_SmMessageSt(&msg_sm);
2157
2158                 if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) !=
2159                     sizeof (SmMessageSt)) {
2160                         clulog(LOG_ERR, "Error replying to action request.\n");
2161                 }
2162         }
2163
2164         if (child)
2165                 exit(ret);      /* child exit */
2166 }
2167
2168
2169 /**
2170  * Check to see if we need to kill a child process - and do so if necessary.
2171  * We do not need to reset the cs_pid field.  This should only be called
2172  * during a remote node-down event to determine if we had a relocate-request
2173  * or other request out to that node.  If so, we need to kill the child
2174  * handling that request.
2175  *
2176  * @param svcID         Service ID
2177  * @param svc           Service block (status of svcID)
2178  */
2179 void
2180 consider_reapage(int svcID, ServiceBlock * svc)
2181 {
2182         /*
2183          * Since PENDING is only a valid state when BOTH nodes are up, and
2184          * given that the remote node just died, mark the service as
2185          * 'stopped' if it was in the 'pending' state.  Kill the child
2186          * process if it exists.
2187          */
2188         if (svc->sb_state == SVC_PENDING) {
2189                 if (svc_children[svcID].cs_pid)
2190                         kill(svc_children[svcID].cs_pid, SIGKILL);
2191
2192                 clulog(LOG_DEBUG, "Marking %d (state %d) as stopped", svcID,
2193                        svc->sb_state);
2194
2195                 /* Mark state -> stopped */
2196                 if (clu_svc_lock(svcID) == -1) {
2197                         clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2198                                strerror(errno));
2199                         return;
2200                 }
2201
2202                 if (getServiceStatus(svcID, svc) == SUCCESS) {
2203                         svc->sb_last_owner = svc->sb_owner;
2204                         svc->sb_owner = NODE_ID_NONE;
2205                         svc->sb_state = SVC_STOPPED;
2206                         if (setServiceStatus(svc) != SUCCESS) {
2207                                 clulog(LOG_ERR,
2208                                        "Failed marking service %d as stopped\n",
2209                                        svcID);
2210                         }
2211                 }
2212                 clu_svc_unlock(svcID);
2213                 return;
2214         }
2215
2216         if (!svc_children[svcID].cs_pid)
2217                 return;
2218
2219         /*
2220          * The child was SVC_START and the other node is marked as the owner.
2221          * This means we tried to start it locally, failed, and send a
2222          * REMOTE_START to the other node, but the other node died before we
2223          * received a response.
2224          *
2225          * Simplify: Kill child whenever our partner owns the service.
2226          */
2227         if (svc->sb_owner != myNodeID) {
2228                 clulog(LOG_INFO,
2229                        "Killing child PID%d: Remote member went down!",
2230                        svc_children[svcID].cs_pid);
2231                 kill(svc_children[svcID].cs_pid, SIGKILL);
2232                 return;
2233         }
2234
2235         /*
2236          * Our last case is an explicit relocate (eg, from cluadmin).  The other
2237          * node went down, and we received its node-down event.  This could have
2238          * been taken care of above, but we still need to catch the cases where
2239          * it hasn't been taken care of yet...
2240          */
2241         if (svc_children[svcID].cs_rq == SVC_RELOCATE) {
2242                 clulog(LOG_INFO,
2243                        "Killing child PID%d: Remote member went down!",
2244                        svc_children[svcID].cs_pid);
2245                 kill(svc_children[svcID].cs_pid, SIGKILL);
2246         }
2247 }
2248
2249
2250 /**
2251  * Rewrite a service block as 'stopped' if all members of its
2252  * restricted failover domain went offline.
2253  *
2254  * @param svcID         Service ID to stop.
2255  * @return              FAIL, SUCCESS
2256  */
2257 static int
2258 check_rdomain_crash(int svcID, ServiceBlock *svcStatus)
2259 {
2260         char *svcName;
2261         memb_mask_t allowed_nodes;
2262
2263         if (memb_online(membership, svcStatus->sb_owner) ||
2264             (svcStatus->sb_state == SVC_STOPPED))
2265                 return SUCCESS;
2266
2267         memcpy(allowed_nodes, membership, sizeof(memb_mask_t));
2268         memb_mark_down(allowed_nodes, svcStatus->sb_owner);
2269         if (best_target_node(allowed_nodes, svcStatus->sb_owner, svcID) !=
2270             svcStatus->sb_owner)
2271                 return SUCCESS;
2272
2273         if (clu_svc_lock(svcID) == -1) {
2274                 clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2275                        strerror(errno));
2276                 return FAIL;
2277         }
2278
2279         if (getServiceStatus(svcID, svcStatus) != SUCCESS) {
2280                 clu_svc_unlock(svcID);
2281                 clulog(LOG_ERR, "Failed getting status for service %s\n",
2282                        svcName);
2283                 return FAIL;
2284         }
2285
2286         if ((svcStatus->sb_state != SVC_STARTED) ||
2287             (svcStatus->sb_owner == myNodeID) ||
2288             memb_online(membership, svcStatus->sb_owner)) {
2289                 clu_svc_unlock(svcID);
2290                 return SUCCESS;
2291         }
2292
2293         svcStatus->sb_last_owner = svcStatus->sb_owner;
2294         svcStatus->sb_owner = NODE_ID_NONE;
2295         svcStatus->sb_state = SVC_STOPPED;
2296         svcStatus->sb_transition = (uint64_t)time(NULL);
2297         if (setServiceStatus(svcStatus) != SUCCESS) {
2298                 clu_svc_unlock(svcID);
2299                 clulog(LOG_ERR, "Failed changing service status\n");
2300                 return FAIL;
2301         }
2302         clu_svc_unlock(svcID);
2303         return SUCCESS;
2304 }
2305
2306
2307 /**
2308  * Called to decide what services to start locally during a node_event.
2309  * Originally a part of node_event, it is now its own function to cut down
2310  * on the length of node_event.
2311  *
2312  * @see                 node_event
2313  */
2314 void
2315 eval_services(int local, int nodeStatus)
2316 {
2317         int svcID;
2318         char *svcName, *nodeName;
2319         ServiceBlock svcStatus;
2320
2321         if (services_locked)
2322                 return;
2323
2324         for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
2325
2326                 if (serviceExists(svcID) != YES)
2327                         continue;
2328
2329                 getSvcName(svcID, &svcName);
2330
2331                 /*
2332                  * Lock the service information and get the current service
2333                  * status.
2334                  */
2335                 if (clu_svc_lock(svcID) == -1) {
2336                         clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2337                                strerror(errno));
2338                         return;
2339                 }
2340
2341                 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
2342                         clulog(LOG_ERR, "Cannot get status for service %s\n",
2343                                svcName);
2344                         clu_svc_unlock(svcID);
2345                         continue;
2346                 }
2347                 clu_svc_unlock(svcID);
2348
2349                 if (svcStatus.sb_owner == NODE_ID_NONE)
2350                         nodeName = "none";
2351                 else
2352                         getNodeName(svcStatus.sb_owner, &nodeName);
2353
2354                 if ((svcStatus.sb_state == SVC_DISABLED) ||
2355                     (svcStatus.sb_state == SVC_FAILED))
2356                         continue;
2357
2358                 clulog(LOG_DEBUG, "Evaluating service %s, state %s, owner "
2359                        "%s\n", svcName,
2360                        serviceStateStrings[svcStatus.sb_state], nodeName);
2361
2362                 if (local && (nodeStatus == NODE_UP)) {
2363
2364                         /*
2365                          * Start any stopped services, or started services
2366                          * that are owned by a down node.
2367                          */
2368                         if (node_should_start(myNodeID, membership, svcID) ==
2369                             FOD_BEST)
2370                                 handle_svc_request(svcID, SVC_START, -1, -1);
2371
2372                         continue;
2373                 }
2374
2375                 if (!local && (nodeStatus == NODE_DOWN)) {
2376
2377                         /*
2378                          * Start any stopped services, or started services
2379                          * that are owned by a down node.
2380                          */
2381                         consider_reapage(svcID, &svcStatus);
2382                         if (node_should_start(myNodeID, membership, svcID) ==
2383                             FOD_BEST)
2384                                 handle_svc_request(svcID, SVC_START, -1, -1);
2385                         else
2386                                 check_rdomain_crash(svcID, &svcStatus);
2387                         /*
2388                          * TODO
2389                          * Mark a service as 'stopped' if no members in its restricted
2390                          * fail-over domain are running.
2391                          */
2392                 }
2393         }
2394 }
2395
2396
2397 /**
2398  * Called to handle the transition of a cluster member from up->down or
2399  * down->up.  This handles initializing services (in the local node-up case),
2400  * exiting due to loss of quorum (local node-down), and service fail-over
2401  * (remote node down).
2402  *
2403  * @param nodeID                ID of the member which has come up/gone down.
2404  * @param nodeStatus            New state of the member in question.
2405  * @see eval_services
2406  */
2407 void
2408 node_event(int nodeID, int nodeStatus)
2409 {
2410         int local = 0;
2411         int partner;
2412
2413         local = (nodeID == myNodeID);
2414         if (local) {
2415                 if (nodeStatus == NODE_UP) {
2416
2417                         if (myNodeState == NODE_UP)
2418                                 return;
2419
2420                         myNodeState = NODE_UP;
2421
2422 #ifdef OLD_CLU_ALIAS
2423                         clu_alias(0);
2424 #endif
2425
2426                         clulog(LOG_DEBUG,
2427                                "local member up, initializing services\n");
2428
2429                         /*
2430                          * Initialize all services we own. We needed to wait
2431                          * for a NODE_UP event as we need the locking
2432                          * subsystem for this.
2433                          */
2434
2435                         if (init_services() != SUCCESS) {
2436                                 clulog(LOG_ERR, "Cannot initialize services\n");
2437                                 svcmgr_exit(1, 0);
2438                         }
2439                 }
2440
2441                 if (nodeStatus == NODE_DOWN) {
2442                         svcmgr_exit(0, 0);
2443                         /* NOT REACHED */
2444                 }
2445         } else {
2446
2447                 /*
2448                  * Nothing to do for events from other nodes if we are not up.
2449                  */
2450
2451                 if (myNodeState != NODE_UP)
2452                         return;
2453         }
2454
2455 #ifdef OLD_CLU_ALIAS
2456         if (myNodeID == memb_high_node(membership)) {
2457                 clu_alias(1);
2458         } else {
2459                 clu_alias(0);
2460         }
2461 #endif
2462
2463         eval_services(local, nodeStatus);
2464
2465         /* If we just came up, and our partner is up request a failback */
2466         if (local && (nodeStatus == NODE_UP)) {
2467
2468                 for (partner = 0; partner < MAX_NODES; partner++) {
2469                         if (partner == myNodeID)
2470                                 continue;
2471
2472                         if (memb_online(membership, partner)) {
2473                                 if (request_failback(partner) != SUCCESS) {
2474                                         clulog(LOG_ERR,
2475                                                "Unable to inform partner "
2476                                                "to start failback\n");
2477                                 }
2478                         }
2479                 }
2480         }
2481 }
2482
2483
2484 /**
2485  * Run service status scripts on all services which (a) we are running and
2486  * (b) have check intervals set.
2487  *
2488  * @param elapsed               Number of elapsed seconds since last time
2489  *                              check_services was run.
2490  */
2491 void
2492 check_services(int elapsed)
2493 {
2494         int svcID;
2495         char *svcName;
2496         ServiceBlock svcStatus;
2497         char *intervalStr;
2498         int interval;
2499
2500         for (svcID = 0; svcID < MAX_SERVICES; svcID++) {
2501
2502                 if (serviceExists(svcID) != YES)
2503                         continue;
2504
2505                 getSvcName(svcID, &svcName);
2506
2507                 /*
2508                  * Check service interval first, since it doesn't
2509                  * require a lock.
2510                  */
2511                 if (getSvcCheckInterval(svcID, &intervalStr) == SUCCESS)
2512                         interval = atoi(intervalStr);
2513                 else
2514                         interval = 0;
2515
2516                 if (!interval)
2517                         continue;
2518
2519                 /*
2520                  * Check service status
2521                  */
2522                 if (clu_svc_lock(svcID) == -1) {
2523                         clulog(LOG_ERR, "Unable to obtain cluster lock: %s\n",
2524                                strerror(errno));
2525                         return;
2526                 }
2527
2528                 if (getServiceStatus(svcID, &svcStatus) != SUCCESS) {
2529                         clu_svc_unlock(svcID);
2530                         clulog(LOG_ERR,
2531                                "Failed getting status for service %s\n",
2532                                svcName);
2533                         continue;
2534                 }
2535                 clu_svc_unlock(svcID);
2536
2537                 if ((svcStatus.sb_owner != myNodeID)
2538                     || (svcStatus.sb_state != SVC_STARTED))
2539                         continue;
2540
2541                 ticks[svcID] += elapsed;
2542
2543                 clulog(LOG_DEBUG,
2544                        "Check interval for service %s is %d, elapsed %d\n",
2545                        svcName, interval, ticks[svcID]);
2546
2547                 if (ticks[svcID] < interval) {
2548                         clulog(LOG_DEBUG, "Too early to check service %s\n",
2549                                svcName);
2550                         continue;
2551                 }
2552
2553                 ticks[svcID] = 0;
2554                 handle_svc_request(svcID, SVC_CHECK, -1, -1);
2555         }
2556 }
2557
2558
2559 /**
2560  * Handle a QUORUM or QUORUM_GAINED message from the quorum daemon.  This
2561  * updates our local membership view and handles whether or not we should
2562  * exit, as well as determines node transitions (thus, calling node_event()).
2563  *
2564  * @param msg_quorum            Cluster event from the quorum daemin.
2565  * @see                         node_event
2566  * @return                      0
2567  */
2568 int
2569 handle_quorum_msg(cm_event_t *msg_quorum)
2570 {
2571         memb_mask_t     node_delta, old_membership;
2572         int             x;
2573         char            *nodeName;
2574         int             me = 0;
2575
2576         memcpy(old_membership, membership, sizeof(memb_mask_t));
2577         memcpy(membership, cm_quorum_mask(msg_quorum), sizeof(memb_mask_t));
2578
2579         lock_set_quorum_view(cm_quorum_view(msg_quorum));
2580
2581         clulog(LOG_INFO, "Quorum Event: View #%d %s\n",
2582                (int)cm_quorum_view(msg_quorum),
2583                memb_mask_str(cm_quorum_mask(msg_quorum)));
2584
2585         /*
2586          * Handle nodes lost.  Do our local node event first.
2587          */
2588         memb_mask_lost(node_delta, old_membership, membership);
2589
2590         me = memb_online(node_delta, myNodeID);
2591         if (me) {
2592                 /* Should not happen */
2593                 clulog(LOG_INFO, "State change: LOCAL OFFLINE\n");
2594                 node_event(myNodeID, NODE_DOWN);
2595                 /* NOTREACHED */
2596         }
2597
2598         for (x=0; x<MAX_NODES; x++) {
2599                 if (x == myNodeID)
2600                         continue;
2601                 /*
2602                  * If a node loses its panic status and is not online,
2603                  * take over services.  That is - someone decided *for sure*
2604                  * that said member is DOWN - so its state is no longer
2605                  * unknown.  (ie, disk-tiebreaker lost quorum...)
2606                  */
2607                 getNodeName(x, &nodeName);
2608
2609                 if (memb_online(mask_panic, x) &&
2610                     !memb_online(cm_quorum_mask_panic(msg_quorum),x) &&
2611                     !memb_online(cm_quorum_mask(msg_quorum),x)) {
2612                         memb_mark_down(mask_panic, x);
2613                         node_event(x, NODE_DOWN);
2614                         clulog(LOG_INFO, "State change: %s DOWN\n",
2615                                nodeName);
2616                         continue;
2617                 }
2618
2619                 if (!memb_online(node_delta, x))
2620                         continue;
2621
2622                 if (memb_online(cm_quorum_mask_panic(msg_quorum), x)) {
2623                         clulog(LOG_WARNING, "Member %s's state is uncertain: "
2624                                "Some services may be unavailable!",
2625                                nodeName);
2626                         continue;
2627                 }
2628
2629                 node_event(x, NODE_DOWN);
2630                 clulog(LOG_INFO, "State change: %s DOWN\n",
2631                        nodeName);
2632         }
2633
2634         /*
2635          * Store our panic nodemask.
2636          */
2637         memcpy(mask_panic, cm_quorum_mask_panic(msg_quorum),
2638                sizeof(memb_mask_t));
2639
2640         /*
2641          * Handle nodes gained.  Do our local node event first.
2642          */
2643         me = memb_mask_gained(node_delta, old_membership, membership);
2644         if (me) {
2645                 clulog(LOG_INFO, "State change: Local UP\n");
2646                 node_event(myNodeID, NODE_UP);
2647         }
2648
2649         for (x=0; x<MAX_NODES; x++) {
2650                 if (!memb_online(node_delta, x))
2651                         continue;
2652
2653                 if (x == myNodeID)
2654                         continue;
2655
2656                 node_event(x, NODE_UP);
2657                 getNodeName(x, &nodeName);
2658                 clulog(LOG_INFO, "State change: %s UP\n",
2659                        nodeName);
2660         }
2661
2662         return 0;
2663 }
2664
2665
2666 /**
2667  * Read a message on a file descriptor (the one which is connected to
2668  * the quorumd daemon) and process it accordingly.
2669  *
2670  * @param fd            File descriptor connected to the quorum daemon.
2671  * @return              FAIL - no message waiting/empty message,
2672  *                      SUCCESS - successfully handled message.
2673  * @see                 dispatch_msg
2674  */
2675 int
2676 quorum_msg(msg_handle_t fd)
2677 {
2678         cm_event_t      *msg_quorum;
2679
2680         msg_quorum = cm_ev_read(fd);
2681
2682         if (!msg_quorum)
2683                 return FAIL;
2684
2685         switch (cm_ev_event(msg_quorum)) {
2686         case EV_QUORUM_LOST:
2687                 clulog(LOG_CRIT,"Halting services due to loss of quorum\n");
2688                 svcmgr_exit(1, 0);
2689                 /* NOT REACHED */
2690                 break;
2691
2692         case EV_QUORUM:
2693         case EV_QUORUM_GAINED:
2694                 handle_quorum_msg(msg_quorum);
2695                 break;
2696
2697         case EV_NO_QUORUM:
2698                 /* idle(); */
2699                 break;
2700
2701         default:
2702                 clulog(LOG_DEBUG, "unhandled message request %d\n",
2703                        cm_ev_event(msg_quorum));
2704                 break;
2705         }
2706
2707         cm_ev_free(msg_quorum);
2708         return SUCCESS;
2709 }
2710
2711
2712 /**
2713  * Receive and process a message on a file descriptor and decide what to
2714  * do with it.  This function doesn't handle messages from the quorum daemon.
2715  *
2716  * @param fd            File descriptor with a waiting message.S
2717  * @return              FAIL - failed to receive/handle message, or invalid
2718  *                      data received.  SUCCESS - handled message successfully.
2719  * @see                 quorum_msg
2720  */
2721 int
2722 dispatch_msg(msg_handle_t fd)
2723 {
2724         int ret;
2725         generic_msg_hdr msg_hdr;
2726         SmMessageSt     msg_sm;
2727
2728         /* Peek-a-boo */
2729         ret = msg_peek(fd, &msg_hdr, sizeof(msg_hdr));
2730         if (ret != sizeof (generic_msg_hdr)) {
2731                 clulog(LOG_ERR, "error receiving message header\n");
2732                 return FAIL;
2733         }
2734
2735         /* Decode the header */
2736         swab_generic_msg_hdr(&msg_hdr);
2737         if ((msg_hdr.gh_magic != GENERIC_HDR_MAGIC)) {
2738                 clulog(LOG_ERR, "Invalid magic: Wanted 0x%08x, got 0x%08x\n",
2739                        GENERIC_HDR_MAGIC, msg_hdr.gh_magic);
2740                 return FAIL;
2741         }
2742
2743         clulog(LOG_DEBUG, "received message, fd %d\n", fd);
2744
2745         switch (msg_hdr.gh_command) {
2746         case SVC_CONFIG_UPDATE:
2747                 clulog(LOG_INFO, "Rereading configuration...\n");
2748
2749                 rebuild_config_lockless();
2750
2751                 msg_svc_init(1);
2752                 set_loglevel();
2753                 notify_everybody();
2754                 break;
2755
2756         case SVC_LOCK:
2757                 clulog(LOG_NOTICE, "Service states locked\n");
2758                 services_locked = 1;
2759                 break;
2760
2761         case SVC_UNLOCK:
2762                 clulog(LOG_NOTICE, "Service states unlocked\n");
2763                 services_locked = 0;
2764                 break;
2765
2766         case SVC_QUERY_LOCK:
2767                 msg_send_simple(fd, services_locked?SVC_LOCK:SVC_UNLOCK, 0, 0);
2768                 break;
2769
2770         case SVC_ACTION_REQUEST:
2771
2772                 ret = msg_receive_timeout(fd, &msg_sm, sizeof(msg_sm),
2773                                           MSG_TIMEOUT);
2774                 if (ret != sizeof(msg_sm)) {
2775                         clulog(LOG_ERR, "receiving message data from client "
2776                                "error: %d\n", ret);
2777                         return FAIL;
2778                 }
2779
2780                 /* Decode SmMessageSt message */
2781                 swab_SmMessageSt(&msg_sm);
2782
2783                 if (services_locked) {
2784                         msg_sm.sm_data.d_ret = FAIL;
2785                         /* Encode before responding... */
2786                         swab_SmMessageSt(&msg_sm);
2787
2788                         if (msg_send(fd, &msg_sm, sizeof (SmMessageSt)) !=
2789                             sizeof (SmMessageSt))
2790                                 clulog(LOG_ERR,
2791                                        "Error replying to action request.\n");
2792
2793                         break;
2794                 }
2795
2796                 if (msg_sm.sm_data.d_action == SVC_FAILBACK) {
2797                         failback(msg_sm.sm_data.d_svcOwner);
2798                         break;
2799                 }
2800
2801                 handle_svc_request(msg_sm.sm_data.d_svcID,
2802                                    msg_sm.sm_data.d_action,
2803                                    msg_sm.sm_data.d_svcOwner, fd);
2804                 break;
2805
2806         default:
2807                 clulog(LOG_DEBUG, "unhandled message request %d\n",
2808                        msg_hdr.gh_command);
2809                 break;
2810         }
2811         return SUCCESS;
2812 }
2813
2814
2815 int
2816 main(int argc, char **argv)
2817 {
2818         struct timeval timeout, tv1, tv2;
2819         int elapsed_secs;
2820         int check_period = 0;
2821         msg_handle_t fd;
2822         int i;
2823         msg_handle_t listen_fd, quorum_fd;
2824         sigset_t set;
2825         fd_set rfds;
2826         extern char *optarg;
2827         int foreground = 0, debug = 0, opt, retries = 0;
2828
2829         while ((opt = getopt(argc, argv, "fd")) != EOF) {
2830                 switch (opt) {
2831                 case 'd':
2832                         debug = 1;
2833                         break;
2834                 case 'f':
2835                         foreground = 1;
2836                 default:
2837                         break;
2838                 }
2839         }
2840
2841         if (!debug)
2842                 (void) clu_set_loglevel(LOG_INFO);
2843         else
2844                 (void) clu_set_loglevel(LOG_DEBUG);
2845
2846         if (!foreground)
2847                 daemon_init(argv[0]);
2848         else
2849                 clu_log_console(1);
2850
2851         /*
2852          * Generally, you do this when you know you have quorum.
2853          * However, the service manager simply doesn't get here without
2854          * quorum... (The quorum daemon spawns it when it achieves quorum)
2855          */
2856         shared_storage_init();
2857         switch(boot_config_init()) {
2858         case -1:
2859                 clulog(LOG_CRIT, "Configuration invalid!\n");
2860                 return -1;
2861         case 1:
2862                 notify_everybody();
2863                 break;
2864         case 0:
2865         default:
2866                 break;
2867         }
2868
2869         memset(membership,0,sizeof(memb_mask_t));
2870
2871         set_facility();
2872         if (!debug)
2873                 set_loglevel();
2874
2875         clulog(LOG_DEBUG, "Service Manager starting\n");
2876
2877         /*
2878          * daemon_init() blocks most signals, so we need to add the
2879          * ones the Service Manager is interested in.
2880          */
2881         sigemptyset(&set);
2882         sigaddset(&set, SIGINT);
2883         sigaddset(&set, SIGTERM);
2884         sigaddset(&set, SIGHUP);
2885         sigaddset(&set, SIGCHLD);
2886         sigprocmask(SIG_UNBLOCK, &set, NULL);
2887         (void) signal(SIGINT, (void (*)(int)) sigterm_handler);
2888         (void) signal(SIGTERM, (void (*)(int)) sigterm_handler);
2889         (void) signal(SIGHUP, (void (*)(int)) sighup_handler);
2890         (void) signal(SIGCHLD, (void (*)(int)) reap_zombies);
2891
2892         /*
2893          * Retrieve our node id
2894          */
2895         myNodeID = memb_local_id();
2896
2897         getNodeName(myNodeID, &myNodeName);
2898         myNodeName = strdup(myNodeName);
2899         myNodeState = NODE_DOWN;
2900
2901         for (i = 0; i < MAX_SERVICES; i++) {
2902                 ticks[i] = 0;
2903                 svc_children[i].cs_pid = 0;
2904                 svc_children[i].cs_rq = 0;
2905         }
2906
2907         /*
2908          * Set up the message service
2909          */
2910         do {
2911                 listen_fd = msg_listen(PROCID_CLUSVCMGRD);
2912                 if (listen_fd >= 0)
2913                         break;
2914
2915                 if (++retries < 30) {
2916                         sleep(1);       /* Arbitrary... */
2917                         continue;
2918                 }
2919
2920                 /* Could be that we lost and regained quorum really quickly */
2921                 clulog(LOG_ERR, "Error setting up message listener: %s\n",
2922                                strerror(errno));
2923                 clulog(LOG_ERR, "%s process may already be running.\n",
2924                        argv[0]);
2925                 exit(1);
2926         } while (1);
2927
2928         /*
2929          * Register for quorum events
2930          */
2931         do {
2932                 quorum_fd = cm_ev_register(EC_QUORUM);
2933                 if (quorum_fd >= 0)
2934                         break;
2935
2936                 if (++retries < 10) {
2937                         sleep(1);
2938                         continue;
2939                 }
2940
2941                 clulog(LOG_CRIT, "Couldn't register with the quorum daemon!");
2942                 exit(1);
2943         } while(1);
2944
2945         while (1) {
2946
2947                 gettimeofday(&tv1, NULL);
2948 #if 0
2949                 /*
2950                  * Reap any zombied service scripts, as we do not synchronously
2951                  * wait on any of the service scripts. If the process was
2952                  * handling a service action, clear out the indication that it
2953                  * was running.
2954                  */
2955
2956                 reap_zombies();
2957 #endif
2958                 if (sighup_received) {
2959                         sighup_received = 0;
2960                         update_config();
2961                 }
2962
2963                 if (sigterm_received)
2964                         svcmgr_exit(0, 1);
2965
2966                 FD_ZERO(&rfds);
2967                 FD_SET(listen_fd, &rfds);
2968                 FD_SET(quorum_fd, &rfds);
2969                 timeout.tv_sec = 2;
2970                 timeout.tv_usec = 0;
2971
2972                 i = select(MAX(listen_fd,quorum_fd) + 1, &rfds, NULL, NULL,
2973                            &timeout);
2974
2975                 /*
2976                  * We used to not check the return from the select call.
2977                  * However, this is necessary now because clusvcmgrd needs
2978                  * to properly handle SIGHUP
2979                  */
2980                 if (i <= 0) {
2981                         FD_ZERO(&rfds);
2982                         if ((i == -1) && (errno != EINTR))
2983                                 clulog(LOG_WARNING, "select: %s\n",
2984                                        strerror(errno));
2985                 }
2986
2987                 if (FD_ISSET(listen_fd, &rfds)) {
2988                         fd = msg_accept_timeout(listen_fd, 1);
2989                         /*
2990                          * Process any waiting messages.
2991                          */
2992                         if (fd != -1) {
2993                                 dispatch_msg(fd);
2994                                 msg_close(fd);
2995                         }
2996                 }
2997
2998                 if (FD_ISSET(quorum_fd, &rfds)) {
2999                         clulog(LOG_DEBUG, "Processing quorum event\n");
3000                         if (quorum_msg(quorum_fd) == -1) {
3001                                 clulog(LOG_WARNING, "Invalid message from "
3002                                        "Quorum Daemon.  Reconnecting\n");
3003                                 /* Failed to process it?  Try reconnecting */
3004                                 cm_ev_unregister(quorum_fd);
3005                                 sleep(2);
3006                                 if (((quorum_fd =
3007                                       cm_ev_register(EC_QUORUM)) == -1) &&
3008                                     !sigterm_received) {
3009
3010                                         clulog(LOG_EMERG, "Couldn't reconnect "
3011                                                "to the quorum daemon! "
3012                                                "REBOOTING");
3013                                         REBOOT(RB_AUTOBOOT);
3014                                 }
3015                         }
3016                 }
3017
3018                 gettimeofday(&tv2, NULL);
3019                 elapsed_secs = tv2.tv_sec - tv1.tv_sec;
3020
3021                 /*
3022                  * Check the status of running services and the cluster
3023                  * configuration file (/etc/cluster.xml).
3024                  */
3025                 if ((check_period += elapsed_secs) >= CHECK_INTERVAL) {
3026                         check_config_file();
3027                         if (check_config_data() == 1) {
3028                                 rebuild_config_lockless();
3029                                 msg_svc_init(1);
3030                                 set_loglevel();
3031                                 notify_everybody();
3032                         }
3033                         check_services(check_period);
3034                         check_period = 0;
3035                 }
3036         }
3037 }