dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / cmd / cmd-inet / usr.lib / ilbd / ilbd_hc.c
blob754484d83485595522b1938d29669cb9ade744c9
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
28 #include <sys/types.h>
29 #include <sys/socket.h>
30 #include <sys/list.h>
31 #include <sys/stropts.h>
32 #include <sys/siginfo.h>
33 #include <sys/wait.h>
34 #include <arpa/inet.h>
35 #include <netinet/in.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <strings.h>
39 #include <stddef.h>
40 #include <unistd.h>
41 #include <libilb.h>
42 #include <port.h>
43 #include <time.h>
44 #include <signal.h>
45 #include <assert.h>
46 #include <errno.h>
47 #include <spawn.h>
48 #include <fcntl.h>
49 #include <limits.h>
50 #include <poll.h>
51 #include "libilb_impl.h"
52 #include "ilbd.h"
54 /* Global list of HC objects */
55 list_t ilbd_hc_list;
57 /* Timer queue for all hc related timers. */
58 static iu_tq_t *ilbd_hc_timer_q;
60 /* Indicate whether the timer needs to be updated */
61 static boolean_t hc_timer_restarted;
63 static void ilbd_hc_probe_timer(iu_tq_t *, void *);
64 static ilb_status_t ilbd_hc_restart_timer(ilbd_hc_t *, ilbd_hc_srv_t *);
65 static boolean_t ilbd_run_probe(ilbd_hc_srv_t *);
67 #define MAX(a, b) ((a) > (b) ? (a) : (b))
70 * Number of arguments passed to a probe. argc[0] is the path name of
71 * the probe.
73 #define HC_PROBE_ARGC 8
76 * Max number of characters to be read from the output of a probe. It
77 * is long enough to read in a 64 bit integer.
79 #define HC_MAX_PROBE_OUTPUT 24
81 void
82 i_ilbd_setup_hc_list(void)
84 list_create(&ilbd_hc_list, sizeof (ilbd_hc_t),
85 offsetof(ilbd_hc_t, ihc_link));
89 * Given a hc object name, return a pointer to hc object if found.
91 ilbd_hc_t *
92 ilbd_get_hc(const char *name)
94 ilbd_hc_t *hc;
96 for (hc = list_head(&ilbd_hc_list); hc != NULL;
97 hc = list_next(&ilbd_hc_list, hc)) {
98 if (strcasecmp(hc->ihc_name, name) == 0)
99 return (hc);
101 return (NULL);
105 * Generates an audit record for create-healthcheck,
106 * delete-healtcheck subcommands.
108 static void
109 ilbd_audit_hc_event(const char *audit_hcname,
110 const ilb_hc_info_t *audit_hcinfo, ilbd_cmd_t cmd,
111 ilb_status_t rc, ucred_t *ucredp)
113 adt_session_data_t *ah;
114 adt_event_data_t *event;
115 au_event_t flag;
116 int audit_error;
118 if ((ucredp == NULL) && (cmd == ILBD_CREATE_HC)) {
120 * we came here from the path where ilbd incorporates
121 * the configuration that is listed in SCF:
122 * i_ilbd_read_config->ilbd_walk_hc_pgs->
123 * ->ilbd_scf_instance_walk_pg->ilbd_create_hc
124 * We skip auditing in that case
126 logdebug("ilbd_audit_hc_event: skipping auditing");
127 return;
130 if (adt_start_session(&ah, NULL, 0) != 0) {
131 logerr("ilbd_audit_hc_event: adt_start_session failed");
132 exit(EXIT_FAILURE);
134 if (adt_set_from_ucred(ah, ucredp, ADT_NEW) != 0) {
135 (void) adt_end_session(ah);
136 logerr("ilbd_audit_rule_event: adt_set_from_ucred failed");
137 exit(EXIT_FAILURE);
139 if (cmd == ILBD_CREATE_HC)
140 flag = ADT_ilb_create_healthcheck;
141 else if (cmd == ILBD_DESTROY_HC)
142 flag = ADT_ilb_delete_healthcheck;
144 if ((event = adt_alloc_event(ah, flag)) == NULL) {
145 logerr("ilbd_audit_hc_event: adt_alloc_event failed");
146 exit(EXIT_FAILURE);
148 (void) memset((char *)event, 0, sizeof (adt_event_data_t));
150 switch (cmd) {
151 case ILBD_CREATE_HC:
152 event->adt_ilb_create_healthcheck.auth_used =
153 NET_ILB_CONFIG_AUTH;
154 event->adt_ilb_create_healthcheck.hc_test =
155 (char *)audit_hcinfo->hci_test;
156 event->adt_ilb_create_healthcheck.hc_name =
157 (char *)audit_hcinfo->hci_name;
160 * If the value 0 is stored, the default values are
161 * set in the kernel. User land does not know about them
162 * So if the user does not specify them, audit record
163 * will show them as 0
165 event->adt_ilb_create_healthcheck.hc_timeout =
166 audit_hcinfo->hci_timeout;
167 event->adt_ilb_create_healthcheck.hc_count =
168 audit_hcinfo->hci_count;
169 event->adt_ilb_create_healthcheck.hc_interval =
170 audit_hcinfo->hci_interval;
171 break;
172 case ILBD_DESTROY_HC:
173 event->adt_ilb_delete_healthcheck.auth_used =
174 NET_ILB_CONFIG_AUTH;
175 event->adt_ilb_delete_healthcheck.hc_name =
176 (char *)audit_hcname;
177 break;
180 /* Fill in success/failure */
181 if (rc == ILB_STATUS_OK) {
182 if (adt_put_event(event, ADT_SUCCESS, ADT_SUCCESS) != 0) {
183 logerr("ilbd_audit_hc_event: adt_put_event failed");
184 exit(EXIT_FAILURE);
186 } else {
187 audit_error = ilberror2auditerror(rc);
188 if (adt_put_event(event, ADT_FAILURE, audit_error) != 0) {
189 logerr("ilbd_audit_hc_event: adt_put_event failed");
190 exit(EXIT_FAILURE);
193 adt_free_event(event);
194 (void) adt_end_session(ah);
198 * Given the ilb_hc_info_t passed in (from the libilb), create a hc object
199 * in ilbd. The parameter ev_port is not used, refer to comments of
200 * ilbd_create_sg() in ilbd_sg.c
202 /* ARGSUSED */
203 ilb_status_t
204 ilbd_create_hc(const ilb_hc_info_t *hc_info, int ev_port,
205 const struct passwd *ps, ucred_t *ucredp)
207 ilbd_hc_t *hc;
208 ilb_status_t ret = ILB_STATUS_OK;
211 * ps == NULL is from the daemon when it starts and load configuration
212 * ps != NULL is from client.
214 if (ps != NULL) {
215 ret = ilbd_check_client_config_auth(ps);
216 if (ret != ILB_STATUS_OK) {
217 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
218 ret, ucredp);
219 return (ret);
223 if (hc_info->hci_name[0] == '\0') {
224 logdebug("ilbd_create_hc: missing healthcheck info");
225 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
226 ILB_STATUS_ENOHCINFO, ucredp);
227 return (ILB_STATUS_ENOHCINFO);
230 hc = ilbd_get_hc(hc_info->hci_name);
231 if (hc != NULL) {
232 logdebug("ilbd_create_hc: healthcheck name %s already"
233 " exists", hc_info->hci_name);
234 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
235 ILB_STATUS_EEXIST, ucredp);
236 return (ILB_STATUS_EEXIST);
240 * Sanity check on user supplied probe. The given path name
241 * must be a full path name (starts with '/') and is
242 * executable.
244 if (strcasecmp(hc_info->hci_test, ILB_HC_STR_TCP) != 0 &&
245 strcasecmp(hc_info->hci_test, ILB_HC_STR_UDP) != 0 &&
246 strcasecmp(hc_info->hci_test, ILB_HC_STR_PING) != 0 &&
247 (hc_info->hci_test[0] != '/' ||
248 access(hc_info->hci_test, X_OK) == -1)) {
249 if (errno == ENOENT) {
250 logdebug("ilbd_create_hc: user script %s doesn't "
251 "exist", hc_info->hci_test);
252 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
253 ILB_STATUS_ENOENT, ucredp);
254 return (ILB_STATUS_ENOENT);
255 } else {
256 logdebug("ilbd_create_hc: user script %s is "
257 "invalid", hc_info->hci_test);
258 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
259 ILB_STATUS_EINVAL, ucredp);
260 return (ILB_STATUS_EINVAL);
264 /* Create and add the hc object */
265 hc = calloc(1, sizeof (ilbd_hc_t));
266 if (hc == NULL) {
267 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
268 ILB_STATUS_ENOMEM, ucredp);
269 return (ILB_STATUS_ENOMEM);
271 (void) memcpy(&hc->ihc_info, hc_info, sizeof (ilb_hc_info_t));
272 if (strcasecmp(hc->ihc_test, ILB_HC_STR_TCP) == 0)
273 hc->ihc_test_type = ILBD_HC_TCP;
274 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_UDP) == 0)
275 hc->ihc_test_type = ILBD_HC_UDP;
276 else if (strcasecmp(hc->ihc_test, ILB_HC_STR_PING) == 0)
277 hc->ihc_test_type = ILBD_HC_PING;
278 else
279 hc->ihc_test_type = ILBD_HC_USER;
280 list_create(&hc->ihc_rules, sizeof (ilbd_hc_rule_t),
281 offsetof(ilbd_hc_rule_t, hcr_link));
283 /* Update SCF */
284 if (ps != NULL) {
285 if ((ret = ilbd_create_pg(ILBD_SCF_HC, (void *)hc)) !=
286 ILB_STATUS_OK) {
287 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC,
288 ret, ucredp);
289 list_destroy(&hc->ihc_rules);
290 free(hc);
291 return (ret);
295 /* Everything is fine, now add it to the global list. */
296 list_insert_tail(&ilbd_hc_list, hc);
297 ilbd_audit_hc_event(NULL, hc_info, ILBD_CREATE_HC, ret, ucredp);
298 return (ret);
302 * Given a name of a hc object, destroy it.
304 ilb_status_t
305 ilbd_destroy_hc(const char *hc_name, const struct passwd *ps,
306 ucred_t *ucredp)
308 ilb_status_t ret;
309 ilbd_hc_t *hc;
312 * No need to check ps == NULL, daemon won't call any destroy func
313 * at start up.
315 ret = ilbd_check_client_config_auth(ps);
316 if (ret != ILB_STATUS_OK) {
317 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
318 ret, ucredp);
319 return (ret);
322 hc = ilbd_get_hc(hc_name);
323 if (hc == NULL) {
324 logdebug("ilbd_destroy_hc: healthcheck %s does not exist",
325 hc_name);
326 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
327 ILB_STATUS_ENOENT, ucredp);
328 return (ILB_STATUS_ENOENT);
331 /* If hc is in use, cannot delete it */
332 if (hc->ihc_rule_cnt > 0) {
333 logdebug("ilbd_destroy_hc: healthcheck %s is associated"
334 " with a rule - cannot remove", hc_name);
335 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
336 ILB_STATUS_INUSE, ucredp);
337 return (ILB_STATUS_INUSE);
340 if ((ret = ilbd_destroy_pg(ILBD_SCF_HC, hc_name)) !=
341 ILB_STATUS_OK) {
342 logdebug("ilbd_destroy_hc: cannot destroy healthcheck %s "
343 "property group", hc_name);
344 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC,
345 ret, ucredp);
346 return (ret);
349 list_remove(&ilbd_hc_list, hc);
350 list_destroy(&hc->ihc_rules);
351 free(hc);
352 ilbd_audit_hc_event(hc_name, NULL, ILBD_DESTROY_HC, ret, ucredp);
353 return (ret);
357 * Given a hc object name, return its information. Used by libilb to
358 * get hc info.
360 ilb_status_t
361 ilbd_get_hc_info(const char *hc_name, uint32_t *rbuf, size_t *rbufsz)
363 ilbd_hc_t *hc;
364 ilb_hc_info_t *hc_info;
365 ilb_comm_t *ic = (ilb_comm_t *)rbuf;
367 hc = ilbd_get_hc(hc_name);
368 if (hc == NULL) {
369 logdebug("%s: healthcheck %s does not exist", __func__,
370 hc_name);
371 return (ILB_STATUS_ENOENT);
373 ilbd_reply_ok(rbuf, rbufsz);
374 hc_info = (ilb_hc_info_t *)&ic->ic_data;
376 (void) strlcpy(hc_info->hci_name, hc->ihc_name, sizeof (hc->ihc_name));
377 (void) strlcpy(hc_info->hci_test, hc->ihc_test, sizeof (hc->ihc_test));
378 hc_info->hci_timeout = hc->ihc_timeout;
379 hc_info->hci_count = hc->ihc_count;
380 hc_info->hci_interval = hc->ihc_interval;
381 hc_info->hci_def_ping = hc->ihc_def_ping;
383 *rbufsz += sizeof (ilb_hc_info_t);
385 return (ILB_STATUS_OK);
388 static void
389 ilbd_hc_copy_srvs(uint32_t *rbuf, size_t *rbufsz, ilbd_hc_rule_t *hc_rule,
390 const char *rulename)
392 ilbd_hc_srv_t *tmp_srv;
393 ilb_hc_srv_t *dst_srv;
394 ilb_hc_rule_srv_t *srvs;
395 size_t tmp_rbufsz;
396 int i;
398 tmp_rbufsz = *rbufsz;
399 /* Set up the reply buffer. rbufsz will be set to the new size. */
400 ilbd_reply_ok(rbuf, rbufsz);
402 /* Calculate how much space is left for holding server info. */
403 *rbufsz += sizeof (ilb_hc_rule_srv_t);
404 tmp_rbufsz -= *rbufsz;
406 srvs = (ilb_hc_rule_srv_t *)&((ilb_comm_t *)rbuf)->ic_data;
408 tmp_srv = list_head(&hc_rule->hcr_servers);
409 for (i = 0; tmp_srv != NULL && tmp_rbufsz >= sizeof (*dst_srv); i++) {
410 dst_srv = &srvs->rs_srvs[i];
412 (void) strlcpy(dst_srv->hcs_rule_name, rulename, ILB_NAMESZ);
413 (void) strlcpy(dst_srv->hcs_ID, tmp_srv->shc_sg_srv->sgs_srvID,
414 ILB_NAMESZ);
415 (void) strlcpy(dst_srv->hcs_hc_name,
416 tmp_srv->shc_hc->ihc_name, ILB_NAMESZ);
417 dst_srv->hcs_IP = tmp_srv->shc_sg_srv->sgs_addr;
418 dst_srv->hcs_fail_cnt = tmp_srv->shc_fail_cnt;
419 dst_srv->hcs_status = tmp_srv->shc_status;
420 dst_srv->hcs_rtt = tmp_srv->shc_rtt;
421 dst_srv->hcs_lasttime = tmp_srv->shc_lasttime;
422 dst_srv->hcs_nexttime = tmp_srv->shc_nexttime;
424 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv);
425 tmp_rbufsz -= sizeof (*dst_srv);
427 srvs->rs_num_srvs = i;
428 *rbufsz += i * sizeof (*dst_srv);
432 * Given a rule name, return the hc status of its servers.
434 ilb_status_t
435 ilbd_get_hc_srvs(const char *rulename, uint32_t *rbuf, size_t *rbufsz)
437 ilbd_hc_t *hc;
438 ilbd_hc_rule_t *hc_rule;
440 for (hc = list_head(&ilbd_hc_list); hc != NULL;
441 hc = list_next(&ilbd_hc_list, hc)) {
442 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
443 hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
444 if (strcasecmp(hc_rule->hcr_rule->irl_name,
445 rulename) != 0) {
446 continue;
448 ilbd_hc_copy_srvs(rbuf, rbufsz, hc_rule, rulename);
449 return (ILB_STATUS_OK);
452 return (ILB_STATUS_RULE_NO_HC);
456 * Initialize the hc timer and associate the notification of timeout to
457 * the given event port.
459 void
460 ilbd_hc_timer_init(int ev_port, ilbd_timer_event_obj_t *ev_obj)
462 struct sigevent sigev;
463 port_notify_t notify;
465 if ((ilbd_hc_timer_q = iu_tq_create()) == NULL) {
466 logerr("%s: cannot create hc timer queue", __func__);
467 exit(EXIT_FAILURE);
469 hc_timer_restarted = B_FALSE;
471 ev_obj->ev = ILBD_EVENT_TIMER;
472 ev_obj->timerid = -1;
474 notify.portnfy_port = ev_port;
475 notify.portnfy_user = ev_obj;
476 sigev.sigev_notify = SIGEV_PORT;
477 sigev.sigev_value.sival_ptr = &notify;
478 if (timer_create(CLOCK_REALTIME, &sigev, &ev_obj->timerid) == -1) {
479 logerr("%s: cannot create timer", __func__);
480 exit(EXIT_FAILURE);
485 * HC timeout handler.
487 void
488 ilbd_hc_timeout(void)
490 (void) iu_expire_timers(ilbd_hc_timer_q);
491 hc_timer_restarted = B_TRUE;
495 * Set up the timer to fire at the earliest timeout.
497 void
498 ilbd_hc_timer_update(ilbd_timer_event_obj_t *ev_obj)
500 itimerspec_t itimeout;
501 int timeout;
504 * There is no change on the timer list, so no need to set up the
505 * timer again.
507 if (!hc_timer_restarted)
508 return;
510 restart:
511 if ((timeout = iu_earliest_timer(ilbd_hc_timer_q)) == INFTIM) {
512 hc_timer_restarted = B_FALSE;
513 return;
514 } else if (timeout == 0) {
516 * Handle the timeout immediately. After that (clearing all
517 * the expired timers), check to see if there are still
518 * timers running. If yes, start them.
520 (void) iu_expire_timers(ilbd_hc_timer_q);
521 goto restart;
524 itimeout.it_value.tv_sec = timeout / MILLISEC + 1;
525 itimeout.it_value.tv_nsec = 0;
526 itimeout.it_interval.tv_sec = 0;
527 itimeout.it_interval.tv_nsec = 0;
530 * Failure to set a timeout is "OK" since hopefully there will be
531 * other events and timer_settime() will be called again. So
532 * we will only miss some timeouts. But in the worst case, no event
533 * will happen and ilbd will get stuck...
535 if (timer_settime(ev_obj->timerid, 0, &itimeout, NULL) == -1)
536 logerr("%s: cannot set timer", __func__);
537 hc_timer_restarted = B_FALSE;
541 * Kill the probe process of a server.
543 static void
544 ilbd_hc_kill_probe(ilbd_hc_srv_t *srv)
547 * First dissociate the fd from the event port. It should not
548 * fail.
550 if (port_dissociate(srv->shc_ev_port, PORT_SOURCE_FD,
551 srv->shc_child_fd) != 0) {
552 logdebug("%s: port_dissociate: %s", __func__, strerror(errno));
554 (void) close(srv->shc_child_fd);
555 free(srv->shc_ev);
556 srv->shc_ev = NULL;
558 /* Then kill the probe process. */
559 if (kill(srv->shc_child_pid, SIGKILL) != 0) {
560 logerr("%s: rule %s server %s: %s", __func__,
561 srv->shc_hc_rule->hcr_rule->irl_name,
562 srv->shc_sg_srv->sgs_srvID, strerror(errno));
564 /* Should not fail... */
565 if (waitpid(srv->shc_child_pid, NULL, 0) != srv->shc_child_pid) {
566 logdebug("%s: waitpid: rule %s server %s", __func__,
567 srv->shc_hc_rule->hcr_rule->irl_name,
568 srv->shc_sg_srv->sgs_srvID);
570 srv->shc_child_pid = 0;
574 * Disable the server, either because the server is dead or because a timer
575 * cannot be started for this server. Note that this only affects the
576 * transient configuration, meaning only in memory. The persistent
577 * configuration is not affected.
579 static void
580 ilbd_mark_server_disabled(ilbd_hc_srv_t *srv)
582 srv->shc_status = ILB_HCS_DISABLED;
584 /* Disable the server in kernel. */
585 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
586 srv->shc_hc_rule->hcr_rule->irl_name,
587 stat_declare_srv_dead) != ILB_STATUS_OK) {
588 logerr("%s: cannot disable server in kernel: rule %s "
589 "server %s", __func__,
590 srv->shc_hc_rule->hcr_rule->irl_name,
591 srv->shc_sg_srv->sgs_srvID);
596 * A probe fails, set the state of the server.
598 static void
599 ilbd_set_fail_state(ilbd_hc_srv_t *srv)
601 if (++srv->shc_fail_cnt < srv->shc_hc->ihc_count) {
602 /* Probe again */
603 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
604 return;
607 logdebug("%s: rule %s server %s fails %u", __func__,
608 srv->shc_hc_rule->hcr_rule->irl_name, srv->shc_sg_srv->sgs_srvID,
609 srv->shc_fail_cnt);
612 * If this is a ping test, mark the server as
613 * unreachable instead of dead.
615 if (srv->shc_hc->ihc_test_type == ILBD_HC_PING ||
616 srv->shc_state == ilbd_hc_def_pinging) {
617 srv->shc_status = ILB_HCS_UNREACH;
618 } else {
619 srv->shc_status = ILB_HCS_DEAD;
622 /* Disable the server in kernel. */
623 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
624 srv->shc_hc_rule->hcr_rule->irl_name, stat_declare_srv_dead) !=
625 ILB_STATUS_OK) {
626 logerr("%s: cannot disable server in kernel: rule %s "
627 "server %s", __func__,
628 srv->shc_hc_rule->hcr_rule->irl_name,
629 srv->shc_sg_srv->sgs_srvID);
632 /* Still keep probing in case the server is alive again. */
633 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
634 /* Only thing to do is to disable the server... */
635 logerr("%s: cannot restart timer: rule %s server %s", __func__,
636 srv->shc_hc_rule->hcr_rule->irl_name,
637 srv->shc_sg_srv->sgs_srvID);
638 srv->shc_status = ILB_HCS_DISABLED;
643 * A probe process has not returned for the ihc_timeout period, we should
644 * kill it. This function is the handler of this.
646 /* ARGSUSED */
647 static void
648 ilbd_hc_kill_timer(iu_tq_t *tq, void *arg)
650 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
652 ilbd_hc_kill_probe(srv);
653 ilbd_set_fail_state(srv);
657 * Probe timeout handler. Send out the appropriate probe.
659 /* ARGSUSED */
660 static void
661 ilbd_hc_probe_timer(iu_tq_t *tq, void *arg)
663 ilbd_hc_srv_t *srv = (ilbd_hc_srv_t *)arg;
666 * If starting the probe fails, just pretend that the timeout has
667 * extended.
669 if (!ilbd_run_probe(srv)) {
671 * If we cannot restart the timer, the only thing we can do
672 * is to disable this server. Hopefully the sys admin will
673 * notice this and enable this server again later.
675 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
676 logerr("%s: cannot restart timer: rule %s server %s, "
677 "disabling it", __func__,
678 srv->shc_hc_rule->hcr_rule->irl_name,
679 srv->shc_sg_srv->sgs_srvID);
680 ilbd_mark_server_disabled(srv);
682 return;
686 * Similar to above, if kill timer cannot be started, disable the
687 * server.
689 if ((srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q,
690 srv->shc_hc->ihc_timeout, ilbd_hc_kill_timer, srv)) == -1) {
691 logerr("%s: cannot start kill timer: rule %s server %s, "
692 "disabling it", __func__,
693 srv->shc_hc_rule->hcr_rule->irl_name,
694 srv->shc_sg_srv->sgs_srvID);
695 ilbd_mark_server_disabled(srv);
697 hc_timer_restarted = B_TRUE;
700 /* Restart the periodic timer for a given server. */
701 static ilb_status_t
702 ilbd_hc_restart_timer(ilbd_hc_t *hc, ilbd_hc_srv_t *srv)
704 int timeout;
706 /* Don't allow the timeout interval to be less than 1s */
707 timeout = MAX((hc->ihc_interval >> 1) + (gethrtime() %
708 (hc->ihc_interval + 1)), 1);
711 * If the probe is actually a ping probe, there is no need to
712 * do default pinging. Just skip the step.
714 if (hc->ihc_def_ping && hc->ihc_test_type != ILBD_HC_PING)
715 srv->shc_state = ilbd_hc_def_pinging;
716 else
717 srv->shc_state = ilbd_hc_probing;
718 srv->shc_tid = iu_schedule_timer(ilbd_hc_timer_q, timeout,
719 ilbd_hc_probe_timer, srv);
721 if (srv->shc_tid == -1)
722 return (ILB_STATUS_TIMER);
723 srv->shc_lasttime = time(NULL);
724 srv->shc_nexttime = time(NULL) + timeout;
726 hc_timer_restarted = B_TRUE;
727 return (ILB_STATUS_OK);
730 /* Helper routine to associate a server with its hc object. */
731 static ilb_status_t
732 ilbd_hc_srv_add(ilbd_hc_t *hc, ilbd_hc_rule_t *hc_rule,
733 const ilb_sg_srv_t *srv, int ev_port)
735 ilbd_hc_srv_t *new_srv;
736 ilb_status_t ret;
738 if ((new_srv = calloc(1, sizeof (ilbd_hc_srv_t))) == NULL)
739 return (ILB_STATUS_ENOMEM);
740 new_srv->shc_hc = hc;
741 new_srv->shc_hc_rule = hc_rule;
742 new_srv->shc_sg_srv = srv;
743 new_srv->shc_ev_port = ev_port;
744 new_srv->shc_tid = -1;
745 new_srv->shc_nexttime = time(NULL);
746 new_srv->shc_lasttime = new_srv->shc_nexttime;
748 if ((hc_rule->hcr_rule->irl_flags & ILB_FLAGS_RULE_ENABLED) &&
749 ILB_IS_SRV_ENABLED(srv->sgs_flags)) {
750 new_srv->shc_status = ILB_HCS_UNINIT;
751 ret = ilbd_hc_restart_timer(hc, new_srv);
752 if (ret != ILB_STATUS_OK) {
753 free(new_srv);
754 return (ret);
756 } else {
757 new_srv->shc_status = ILB_HCS_DISABLED;
760 list_insert_tail(&hc_rule->hcr_servers, new_srv);
761 return (ILB_STATUS_OK);
764 /* Handy macro to cancel a server's timer. */
765 #define HC_CANCEL_TIMER(srv) \
767 void *arg; \
768 int ret; \
769 if ((srv)->shc_tid != -1) { \
770 ret = iu_cancel_timer(ilbd_hc_timer_q, (srv)->shc_tid, &arg); \
771 (srv)->shc_tid = -1; \
772 assert(ret == 1); \
773 assert(arg == (srv)); \
775 hc_timer_restarted = B_TRUE; \
778 /* Helper routine to dissociate a server from its hc object. */
779 static ilb_status_t
780 ilbd_hc_srv_rem(ilbd_hc_rule_t *hc_rule, const ilb_sg_srv_t *srv)
782 ilbd_hc_srv_t *tmp_srv;
784 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
785 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
786 if (tmp_srv->shc_sg_srv == srv) {
787 list_remove(&hc_rule->hcr_servers, tmp_srv);
788 HC_CANCEL_TIMER(tmp_srv);
789 if (tmp_srv->shc_child_pid != 0)
790 ilbd_hc_kill_probe(tmp_srv);
791 free(tmp_srv);
792 return (ILB_STATUS_OK);
795 return (ILB_STATUS_ENOENT);
798 /* Helper routine to dissociate all servers of a rule from its hc object. */
799 static void
800 ilbd_hc_srv_rem_all(ilbd_hc_rule_t *hc_rule)
802 ilbd_hc_srv_t *srv;
804 while ((srv = list_remove_head(&hc_rule->hcr_servers)) != NULL) {
805 HC_CANCEL_TIMER(srv);
806 if (srv->shc_child_pid != 0)
807 ilbd_hc_kill_probe(srv);
808 free(srv);
812 /* Associate a rule with its hc object. */
813 ilb_status_t
814 ilbd_hc_associate_rule(const ilbd_rule_t *rule, int ev_port)
816 ilbd_hc_t *hc;
817 ilbd_hc_rule_t *hc_rule;
818 ilb_status_t ret;
819 ilbd_sg_t *sg;
820 ilbd_srv_t *ilbd_srv;
822 /* The rule is assumed to be initialized appropriately. */
823 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
824 logdebug("ilbd_hc_associate_rule: healthcheck %s does not "
825 "exist", rule->irl_hcname);
826 return (ILB_STATUS_ENOHCINFO);
828 if ((hc->ihc_test_type == ILBD_HC_TCP &&
829 rule->irl_proto != IPPROTO_TCP) ||
830 (hc->ihc_test_type == ILBD_HC_UDP &&
831 rule->irl_proto != IPPROTO_UDP)) {
832 return (ILB_STATUS_RULE_HC_MISMATCH);
834 if ((hc_rule = calloc(1, sizeof (ilbd_hc_rule_t))) == NULL) {
835 logdebug("ilbd_hc_associate_rule: out of memory");
836 return (ILB_STATUS_ENOMEM);
839 hc_rule->hcr_rule = rule;
840 list_create(&hc_rule->hcr_servers, sizeof (ilbd_hc_srv_t),
841 offsetof(ilbd_hc_srv_t, shc_srv_link));
843 /* Add all the servers. */
844 sg = rule->irl_sg;
845 for (ilbd_srv = list_head(&sg->isg_srvlist); ilbd_srv != NULL;
846 ilbd_srv = list_next(&sg->isg_srvlist, ilbd_srv)) {
847 if ((ret = ilbd_hc_srv_add(hc, hc_rule, &ilbd_srv->isv_srv,
848 ev_port)) != ILB_STATUS_OK) {
849 /* Remove all previously added servers */
850 ilbd_hc_srv_rem_all(hc_rule);
851 list_destroy(&hc_rule->hcr_servers);
852 free(hc_rule);
853 return (ret);
856 list_insert_tail(&hc->ihc_rules, hc_rule);
857 hc->ihc_rule_cnt++;
859 return (ILB_STATUS_OK);
862 /* Dissociate a rule from its hc object. */
863 ilb_status_t
864 ilbd_hc_dissociate_rule(const ilbd_rule_t *rule)
866 ilbd_hc_t *hc;
867 ilbd_hc_rule_t *hc_rule;
869 /* The rule is assumed to be initialized appropriately. */
870 if ((hc = ilbd_get_hc(rule->irl_hcname)) == NULL) {
871 logdebug("ilbd_hc_dissociate_rule: healthcheck %s does not "
872 "exist", rule->irl_hcname);
873 return (ILB_STATUS_ENOENT);
875 for (hc_rule = list_head(&hc->ihc_rules); hc_rule != NULL;
876 hc_rule = list_next(&hc->ihc_rules, hc_rule)) {
877 if (hc_rule->hcr_rule == rule)
878 break;
880 if (hc_rule == NULL) {
881 logdebug("ilbd_hc_dissociate_rule: rule %s is not associated "
882 "with healtcheck %s", rule->irl_hcname, hc->ihc_name);
883 return (ILB_STATUS_ENOENT);
885 ilbd_hc_srv_rem_all(hc_rule);
886 list_remove(&hc->ihc_rules, hc_rule);
887 hc->ihc_rule_cnt--;
888 list_destroy(&hc_rule->hcr_servers);
889 free(hc_rule);
890 return (ILB_STATUS_OK);
894 * Given a hc object name and a rule, check to see if the rule is associated
895 * with the hc object. If it is, the hc object is returned in **hc and the
896 * ilbd_hc_rule_t is returned in **hc_rule.
898 static boolean_t
899 ilbd_hc_check_rule(const char *hc_name, const ilbd_rule_t *rule,
900 ilbd_hc_t **hc, ilbd_hc_rule_t **hc_rule)
902 ilbd_hc_t *tmp_hc;
903 ilbd_hc_rule_t *tmp_hc_rule;
905 if ((tmp_hc = ilbd_get_hc(hc_name)) == NULL)
906 return (B_FALSE);
907 for (tmp_hc_rule = list_head(&tmp_hc->ihc_rules); tmp_hc_rule != NULL;
908 tmp_hc_rule = list_next(&tmp_hc->ihc_rules, tmp_hc_rule)) {
909 if (tmp_hc_rule->hcr_rule == rule) {
910 *hc = tmp_hc;
911 *hc_rule = tmp_hc_rule;
912 return (B_TRUE);
915 return (B_FALSE);
918 /* Associate a server with its hc object. */
919 ilb_status_t
920 ilbd_hc_add_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
921 int ev_port)
923 ilbd_hc_t *hc;
924 ilbd_hc_rule_t *hc_rule;
926 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
927 return (ILB_STATUS_ENOENT);
928 return (ilbd_hc_srv_add(hc, hc_rule, srv, ev_port));
931 /* Dissociate a server from its hc object. */
932 ilb_status_t
933 ilbd_hc_del_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
935 ilbd_hc_t *hc;
936 ilbd_hc_rule_t *hc_rule;
938 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
939 return (ILB_STATUS_ENOENT);
940 return (ilbd_hc_srv_rem(hc_rule, srv));
943 /* Helper routine to enable/disable a server's hc probe. */
944 static ilb_status_t
945 ilbd_hc_toggle_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv,
946 boolean_t enable)
948 ilbd_hc_t *hc;
949 ilbd_hc_rule_t *hc_rule;
950 ilbd_hc_srv_t *tmp_srv;
951 ilb_status_t ret;
953 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
954 return (ILB_STATUS_ENOENT);
955 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
956 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
957 if (tmp_srv->shc_sg_srv != srv) {
958 continue;
960 if (enable) {
961 if (tmp_srv->shc_status == ILB_HCS_DISABLED) {
962 ret = ilbd_hc_restart_timer(hc, tmp_srv);
963 if (ret != ILB_STATUS_OK) {
964 logerr("%s: cannot start timers for "
965 "rule %s server %s", __func__,
966 rule->irl_name,
967 tmp_srv->shc_sg_srv->sgs_srvID);
968 return (ret);
970 /* Start from fresh... */
971 tmp_srv->shc_status = ILB_HCS_UNINIT;
972 tmp_srv->shc_rtt = 0;
973 tmp_srv->shc_fail_cnt = 0;
975 } else {
976 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
977 tmp_srv->shc_status = ILB_HCS_DISABLED;
978 HC_CANCEL_TIMER(tmp_srv);
979 if (tmp_srv->shc_child_pid != 0)
980 ilbd_hc_kill_probe(tmp_srv);
983 return (ILB_STATUS_OK);
985 return (ILB_STATUS_ENOENT);
988 ilb_status_t
989 ilbd_hc_enable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
991 return (ilbd_hc_toggle_server(rule, srv, B_TRUE));
994 ilb_status_t
995 ilbd_hc_disable_server(const ilbd_rule_t *rule, const ilb_sg_srv_t *srv)
997 return (ilbd_hc_toggle_server(rule, srv, B_FALSE));
1001 * Helper routine to enable/disable a rule's hc probe (including all its
1002 * servers).
1004 static ilb_status_t
1005 ilbd_hc_toggle_rule(const ilbd_rule_t *rule, boolean_t enable)
1007 ilbd_hc_t *hc;
1008 ilbd_hc_rule_t *hc_rule;
1009 ilbd_hc_srv_t *tmp_srv;
1010 int ret;
1012 if (!ilbd_hc_check_rule(rule->irl_hcname, rule, &hc, &hc_rule))
1013 return (ILB_STATUS_ENOENT);
1015 for (tmp_srv = list_head(&hc_rule->hcr_servers); tmp_srv != NULL;
1016 tmp_srv = list_next(&hc_rule->hcr_servers, tmp_srv)) {
1017 if (enable) {
1019 * If the server is disabled in the rule, do not
1020 * restart its timer.
1022 if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1023 ILB_IS_SRV_ENABLED(
1024 tmp_srv->shc_sg_srv->sgs_flags)) {
1025 ret = ilbd_hc_restart_timer(hc, tmp_srv);
1026 if (ret != ILB_STATUS_OK) {
1027 logerr("%s: cannot start timers for "
1028 "rule %s server %s", __func__,
1029 rule->irl_name,
1030 tmp_srv->shc_sg_srv->sgs_srvID);
1031 goto rollback;
1032 } else {
1033 /* Start from fresh... */
1034 tmp_srv->shc_status = ILB_HCS_UNINIT;
1035 tmp_srv->shc_rtt = 0;
1036 tmp_srv->shc_fail_cnt = 0;
1039 } else {
1040 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1041 HC_CANCEL_TIMER(tmp_srv);
1042 tmp_srv->shc_status = ILB_HCS_DISABLED;
1043 if (tmp_srv->shc_child_pid != 0)
1044 ilbd_hc_kill_probe(tmp_srv);
1048 return (ILB_STATUS_OK);
1049 rollback:
1050 enable = !enable;
1051 for (tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv);
1052 tmp_srv != NULL;
1053 tmp_srv = list_prev(&hc_rule->hcr_servers, tmp_srv)) {
1054 if (enable) {
1055 if (tmp_srv->shc_status == ILB_HCS_DISABLED &&
1056 ILB_IS_SRV_ENABLED(
1057 tmp_srv->shc_sg_srv->sgs_flags)) {
1058 (void) ilbd_hc_restart_timer(hc, tmp_srv);
1059 tmp_srv->shc_status = ILB_HCS_UNINIT;
1060 tmp_srv->shc_rtt = 0;
1061 tmp_srv->shc_fail_cnt = 0;
1063 } else {
1064 if (tmp_srv->shc_status != ILB_HCS_DISABLED) {
1065 HC_CANCEL_TIMER(tmp_srv);
1066 tmp_srv->shc_status = ILB_HCS_DISABLED;
1067 if (tmp_srv->shc_child_pid != 0)
1068 ilbd_hc_kill_probe(tmp_srv);
1072 return (ret);
1075 ilb_status_t
1076 ilbd_hc_enable_rule(const ilbd_rule_t *rule)
1078 return (ilbd_hc_toggle_rule(rule, B_TRUE));
1081 ilb_status_t
1082 ilbd_hc_disable_rule(const ilbd_rule_t *rule)
1084 return (ilbd_hc_toggle_rule(rule, B_FALSE));
1087 static const char *
1088 topo_2_str(ilb_topo_t topo)
1090 switch (topo) {
1091 case ILB_TOPO_DSR:
1092 return ("DSR");
1093 case ILB_TOPO_NAT:
1094 return ("NAT");
1095 case ILB_TOPO_HALF_NAT:
1096 return ("HALF_NAT");
1097 default:
1098 /* Should not happen. */
1099 logerr("%s: unknown topology", __func__);
1100 break;
1102 return ("");
1106 * Create the argument list to be passed to a hc probe command.
1107 * The passed in argv is assumed to have HC_PROBE_ARGC elements.
1109 static boolean_t
1110 create_argv(ilbd_hc_srv_t *srv, char *argv[])
1112 char buf[INET6_ADDRSTRLEN];
1113 ilbd_rule_t const *rule;
1114 ilb_sg_srv_t const *sg_srv;
1115 struct in_addr v4_addr;
1116 in_port_t port;
1117 int i;
1119 rule = srv->shc_hc_rule->hcr_rule;
1120 sg_srv = srv->shc_sg_srv;
1122 if (srv->shc_state == ilbd_hc_def_pinging) {
1123 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL)
1124 return (B_FALSE);
1125 } else {
1126 switch (srv->shc_hc->ihc_test_type) {
1127 case ILBD_HC_USER:
1128 if ((argv[0] = strdup(srv->shc_hc->ihc_test)) == NULL)
1129 return (B_FALSE);
1130 break;
1131 case ILBD_HC_TCP:
1132 case ILBD_HC_UDP:
1133 if ((argv[0] = strdup(ILB_PROBE_PROTO)) ==
1134 NULL) {
1135 return (B_FALSE);
1137 break;
1138 case ILBD_HC_PING:
1139 if ((argv[0] = strdup(ILB_PROBE_PING)) == NULL) {
1140 return (B_FALSE);
1142 break;
1147 * argv[1] is the VIP.
1149 * Right now, the VIP and the backend server addresses should be
1150 * in the same IP address family. Here we don't do that in case
1151 * this assumption is changed in future.
1153 if (IN6_IS_ADDR_V4MAPPED(&rule->irl_vip)) {
1154 IN6_V4MAPPED_TO_INADDR(&rule->irl_vip, &v4_addr);
1155 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1156 goto cleanup;
1157 } else {
1158 if (inet_ntop(AF_INET6, &rule->irl_vip, buf,
1159 sizeof (buf)) == NULL) {
1160 goto cleanup;
1163 if ((argv[1] = strdup(buf)) == NULL)
1164 goto cleanup;
1167 * argv[2] is the backend server address.
1169 if (IN6_IS_ADDR_V4MAPPED(&sg_srv->sgs_addr)) {
1170 IN6_V4MAPPED_TO_INADDR(&sg_srv->sgs_addr, &v4_addr);
1171 if (inet_ntop(AF_INET, &v4_addr, buf, sizeof (buf)) == NULL)
1172 goto cleanup;
1173 } else {
1174 if (inet_ntop(AF_INET6, &sg_srv->sgs_addr, buf,
1175 sizeof (buf)) == NULL) {
1176 goto cleanup;
1179 if ((argv[2] = strdup(buf)) == NULL)
1180 goto cleanup;
1183 * argv[3] is the transport protocol used in the rule.
1185 switch (rule->irl_proto) {
1186 case IPPROTO_TCP:
1187 argv[3] = strdup("TCP");
1188 break;
1189 case IPPROTO_UDP:
1190 argv[3] = strdup("UDP");
1191 break;
1192 default:
1193 logerr("%s: unknown protocol", __func__);
1194 goto cleanup;
1196 if (argv[3] == NULL)
1197 goto cleanup;
1200 * argv[4] is the load balance mode, DSR, NAT, HALF-NAT.
1202 if ((argv[4] = strdup(topo_2_str(rule->irl_topo))) == NULL)
1203 goto cleanup;
1206 * argv[5] is the port range. Right now, there should only be 1 port.
1208 switch (rule->irl_hcpflag) {
1209 case ILB_HCI_PROBE_FIX:
1210 port = ntohs(rule->irl_hcport);
1211 break;
1212 case ILB_HCI_PROBE_ANY: {
1213 in_port_t min, max;
1215 if (ntohs(sg_srv->sgs_minport) == 0) {
1216 min = ntohs(rule->irl_minport);
1217 max = ntohs(rule->irl_maxport);
1218 } else {
1219 min = ntohs(sg_srv->sgs_minport);
1220 max = ntohs(sg_srv->sgs_maxport);
1222 if (max > min)
1223 port = min + gethrtime() % (max - min + 1);
1224 else
1225 port = min;
1226 break;
1228 default:
1229 logerr("%s: unknown HC flag", __func__);
1230 goto cleanup;
1232 (void) sprintf(buf, "%d", port);
1233 if ((argv[5] = strdup(buf)) == NULL)
1234 goto cleanup;
1237 * argv[6] is the probe timeout.
1239 (void) sprintf(buf, "%d", srv->shc_hc->ihc_timeout);
1240 if ((argv[6] = strdup(buf)) == NULL)
1241 goto cleanup;
1243 argv[7] = NULL;
1244 return (B_TRUE);
1246 cleanup:
1247 for (i = 0; i < HC_PROBE_ARGC; i++) {
1248 free(argv[i]);
1250 return (B_FALSE);
1253 static void
1254 destroy_argv(char *argv[])
1256 int i;
1258 for (i = 0; argv[i] != NULL; i++)
1259 free(argv[i]);
1262 /* Spawn a process to run the hc probe on the given server. */
1263 static boolean_t
1264 ilbd_run_probe(ilbd_hc_srv_t *srv)
1266 posix_spawn_file_actions_t fd_actions;
1267 boolean_t init_fd_actions = B_FALSE;
1268 posix_spawnattr_t attr;
1269 boolean_t init_attr = B_FALSE;
1270 sigset_t child_sigset;
1271 int fds[2];
1272 int fdflags;
1273 pid_t pid;
1274 char *child_argv[HC_PROBE_ARGC];
1275 ilbd_hc_probe_event_t *probe_ev;
1276 char *probe_name;
1278 bzero(child_argv, HC_PROBE_ARGC * sizeof (char *));
1279 if ((probe_ev = calloc(1, sizeof (*probe_ev))) == NULL) {
1280 logdebug("ilbd_run_probe: calloc");
1281 return (B_FALSE);
1284 /* Set up a pipe to get output from probe command. */
1285 if (pipe(fds) < 0) {
1286 logdebug("ilbd_run_probe: cannot create pipe");
1287 free(probe_ev);
1288 return (B_FALSE);
1290 /* Set our side of the pipe to be non-blocking */
1291 if ((fdflags = fcntl(fds[0], F_GETFL, 0)) == -1) {
1292 logdebug("ilbd_run_probe: fcntl(F_GETFL)");
1293 goto cleanup;
1295 if (fcntl(fds[0], F_SETFL, fdflags | O_NONBLOCK) == -1) {
1296 logdebug("ilbd_run_probe: fcntl(F_SETFL)");
1297 goto cleanup;
1300 if (posix_spawn_file_actions_init(&fd_actions) != 0) {
1301 logdebug("ilbd_run_probe: posix_spawn_file_actions_init");
1302 goto cleanup;
1304 init_fd_actions = B_TRUE;
1305 if (posix_spawnattr_init(&attr) != 0) {
1306 logdebug("ilbd_run_probe: posix_spawnattr_init");
1307 goto cleanup;
1309 init_attr = B_TRUE;
1310 if (posix_spawn_file_actions_addclose(&fd_actions, fds[0]) != 0) {
1311 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1312 goto cleanup;
1314 if (posix_spawn_file_actions_adddup2(&fd_actions, fds[1],
1315 STDOUT_FILENO) != 0) {
1316 logdebug("ilbd_run_probe: posix_spawn_file_actions_dup2");
1317 goto cleanup;
1319 if (posix_spawn_file_actions_addclose(&fd_actions, fds[1]) != 0) {
1320 logdebug("ilbd_run_probe: posix_spawn_file_actions_addclose");
1321 goto cleanup;
1324 /* Reset all signal handling of the child to default. */
1325 (void) sigfillset(&child_sigset);
1326 if (posix_spawnattr_setsigdefault(&attr, &child_sigset) != 0) {
1327 logdebug("ilbd_run_probe: posix_spawnattr_setsigdefault");
1328 goto cleanup;
1330 /* Don't want SIGCHLD. */
1331 if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_NOSIGCHLD_NP|
1332 POSIX_SPAWN_SETSIGDEF) != 0) {
1333 logdebug("ilbd_run_probe: posix_spawnattr_setflags");
1334 goto cleanup;
1337 if (!create_argv(srv, child_argv)) {
1338 logdebug("ilbd_run_probe: create_argv");
1339 goto cleanup;
1343 * If we are doing default pinging or not using a user supplied
1344 * probe, we should execute our standard supplied probe. The
1345 * supplied probe command handles all types of probes. And the
1346 * type used depends on argv[0], as filled in by create_argv().
1348 if (srv->shc_state == ilbd_hc_def_pinging ||
1349 srv->shc_hc->ihc_test_type != ILBD_HC_USER) {
1350 probe_name = ILB_PROBE_PROTO;
1351 } else {
1352 probe_name = srv->shc_hc->ihc_test;
1354 if (posix_spawn(&pid, probe_name, &fd_actions, &attr, child_argv,
1355 NULL) != 0) {
1356 logerr("%s: posix_spawn: %s for server %s: %s", __func__,
1357 srv->shc_hc->ihc_test, srv->shc_sg_srv->sgs_srvID,
1358 strerror(errno));
1359 goto cleanup;
1362 (void) close(fds[1]);
1363 srv->shc_child_pid = pid;
1364 srv->shc_child_fd = fds[0];
1365 srv->shc_ev = probe_ev;
1367 probe_ev->ihp_ev = ILBD_EVENT_PROBE;
1368 probe_ev->ihp_srv = srv;
1369 probe_ev->ihp_pid = pid;
1370 if (port_associate(srv->shc_ev_port, PORT_SOURCE_FD, fds[0],
1371 POLLRDNORM, probe_ev) != 0) {
1373 * Need to kill the child. It will free the srv->shc_ev,
1374 * which is probe_ev. So set probe_ev to NULL.
1376 ilbd_hc_kill_probe(srv);
1377 probe_ev = NULL;
1378 goto cleanup;
1381 destroy_argv(child_argv);
1382 (void) posix_spawn_file_actions_destroy(&fd_actions);
1383 (void) posix_spawnattr_destroy(&attr);
1384 return (B_TRUE);
1386 cleanup:
1387 destroy_argv(child_argv);
1388 if (init_fd_actions == B_TRUE)
1389 (void) posix_spawn_file_actions_destroy(&fd_actions);
1390 if (init_attr == B_TRUE)
1391 (void) posix_spawnattr_destroy(&attr);
1392 (void) close(fds[0]);
1393 (void) close(fds[1]);
1394 free(probe_ev);
1395 return (B_FALSE);
1399 * Called by ild_hc_probe_return() to re-associate the fd to a child to
1400 * the event port.
1402 static void
1403 reassociate_port(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1405 if (port_associate(ev_port, PORT_SOURCE_FD, fd,
1406 POLLRDNORM, ev) != 0) {
1408 * If we cannot reassociate with the port, the only
1409 * thing we can do now is to kill the child and
1410 * do a blocking wait here...
1412 logdebug("%s: port_associate: %s", __func__, strerror(errno));
1413 if (kill(ev->ihp_pid, SIGKILL) != 0)
1414 logerr("%s: kill: %s", __func__, strerror(errno));
1415 if (waitpid(ev->ihp_pid, NULL, 0) != ev->ihp_pid)
1416 logdebug("%s: waitpid: %s", __func__, strerror(errno));
1417 free(ev);
1422 * To handle a child probe process hanging up.
1424 static void
1425 ilbd_hc_child_hup(int ev_port, int fd, ilbd_hc_probe_event_t *ev)
1427 ilbd_hc_srv_t *srv;
1428 pid_t ret_pid;
1429 int ret;
1431 srv = ev->ihp_srv;
1433 if (!ev->ihp_done) {
1434 /* ilbd does not care about this process anymore ... */
1435 ev->ihp_done = B_TRUE;
1436 srv->shc_ev = NULL;
1437 srv->shc_child_pid = 0;
1438 HC_CANCEL_TIMER(srv);
1439 ilbd_set_fail_state(srv);
1441 ret_pid = waitpid(ev->ihp_pid, &ret, WNOHANG);
1442 switch (ret_pid) {
1443 case -1:
1444 logperror("ilbd_hc_child_hup: waitpid");
1445 /* FALLTHROUGH */
1446 case 0:
1447 /* The child has not completed the exit. Wait again. */
1448 reassociate_port(ev_port, fd, ev);
1449 break;
1450 default:
1451 /* Right now, we just ignore the exit status. */
1452 if (WIFEXITED(ret))
1453 ret = WEXITSTATUS(ret);
1454 (void) close(fd);
1455 free(ev);
1460 * To read the output of a child probe process.
1462 static void
1463 ilbd_hc_child_data(int fd, ilbd_hc_probe_event_t *ev)
1465 ilbd_hc_srv_t *srv;
1466 char buf[HC_MAX_PROBE_OUTPUT];
1467 int ret;
1468 int64_t rtt;
1470 srv = ev->ihp_srv;
1472 bzero(buf, HC_MAX_PROBE_OUTPUT);
1473 ret = read(fd, buf, HC_MAX_PROBE_OUTPUT - 1);
1474 /* Should not happen since event port should have caught this. */
1475 assert(ret > 0);
1478 * We expect the probe command to print out the RTT only. But
1479 * the command may misbehave and print out more than what we intend to
1480 * read in. So need to do this check below to "flush" out all the
1481 * output from the command.
1483 if (!ev->ihp_done) {
1484 ev->ihp_done = B_TRUE;
1485 /* We don't need to know about this event anymore. */
1486 srv->shc_ev = NULL;
1487 srv->shc_child_pid = 0;
1488 HC_CANCEL_TIMER(srv);
1489 } else {
1490 return;
1493 rtt = strtoll(buf, NULL, 10);
1496 * -1 means the server is dead or the probe somehow fails. Treat
1497 * them both as server is dead.
1499 if (rtt == -1) {
1500 ilbd_set_fail_state(srv);
1501 return;
1502 } else if (rtt > 0) {
1503 /* If the returned RTT value is not valid, just ignore it. */
1504 if (rtt > 0 && rtt <= UINT_MAX) {
1505 /* Set rtt to be the simple smoothed average. */
1506 if (srv->shc_rtt == 0) {
1507 srv->shc_rtt = rtt;
1508 } else {
1509 srv->shc_rtt = 3 * ((srv)->shc_rtt >> 2) +
1510 (rtt >> 2);
1516 switch (srv->shc_state) {
1517 case ilbd_hc_def_pinging:
1518 srv->shc_state = ilbd_hc_probing;
1520 /* Ping is OK, now start the probe. */
1521 ilbd_hc_probe_timer(ilbd_hc_timer_q, srv);
1522 break;
1523 case ilbd_hc_probing:
1524 srv->shc_fail_cnt = 0;
1526 /* Server is dead before, re-enable it. */
1527 if (srv->shc_status == ILB_HCS_UNREACH ||
1528 srv->shc_status == ILB_HCS_DEAD) {
1530 * If enabling the server in kernel fails now,
1531 * hopefully when the timer fires again later, the
1532 * enabling can be done.
1534 if (ilbd_k_Xable_server(&srv->shc_sg_srv->sgs_addr,
1535 srv->shc_hc_rule->hcr_rule->irl_name,
1536 stat_declare_srv_alive) != ILB_STATUS_OK) {
1537 logerr("%s: cannot enable server in kernel: "
1538 " rule %s server %s", __func__,
1539 srv->shc_hc_rule->hcr_rule->irl_name,
1540 srv->shc_sg_srv->sgs_srvID);
1541 } else {
1542 srv->shc_status = ILB_HCS_ALIVE;
1544 } else {
1545 srv->shc_status = ILB_HCS_ALIVE;
1547 if (ilbd_hc_restart_timer(srv->shc_hc, srv) != ILB_STATUS_OK) {
1548 logerr("%s: cannot restart timer: rule %s server %s",
1549 __func__, srv->shc_hc_rule->hcr_rule->irl_name,
1550 srv->shc_sg_srv->sgs_srvID);
1551 ilbd_mark_server_disabled(srv);
1553 break;
1554 default:
1555 logdebug("%s: unknown state", __func__);
1556 break;
1561 * Handle the return event of a child probe fd.
1563 void
1564 ilbd_hc_probe_return(int ev_port, int fd, int port_events,
1565 ilbd_hc_probe_event_t *ev)
1568 * Note that there can be more than one events delivered to us at
1569 * the same time. So we need to check them individually.
1571 if (port_events & POLLRDNORM)
1572 ilbd_hc_child_data(fd, ev);
1574 if (port_events & (POLLHUP|POLLERR)) {
1575 ilbd_hc_child_hup(ev_port, fd, ev);
1576 return;
1580 * Re-associate the fd with the port so that when the child
1581 * exits, we can reap the status.
1583 reassociate_port(ev_port, fd, ev);