8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / cmd-inet / usr.lib / vrrpd / vrrpd.c
blob9269e9b94fa1a1b24b78c655d29c70742c192835
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <sys/sockio.h>
33 #include <sys/sysevent/vrrp.h>
34 #include <sys/sysevent/eventdefs.h>
35 #include <sys/varargs.h>
36 #include <auth_attr.h>
37 #include <ctype.h>
38 #include <fcntl.h>
39 #include <stdlib.h>
40 #include <strings.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <zone.h>
44 #include <libsysevent.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <arpa/inet.h>
48 #include <signal.h>
49 #include <assert.h>
50 #include <ucred.h>
51 #include <bsm/adt.h>
52 #include <bsm/adt_event.h>
53 #include <priv_utils.h>
54 #include <libdllink.h>
55 #include <libdlvnic.h>
56 #include <libipadm.h>
57 #include <pwd.h>
58 #include <libvrrpadm.h>
59 #include <net/route.h>
60 #include "vrrpd_impl.h"
63 * A VRRP router can be only start participating the VRRP protocol of a virtual
64 * router when all the following conditions are met:
66 * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
67 * - The RX socket is successfully created over the physical interface to
68 * receive the VRRP multicast advertisement. Note that one RX socket can
69 * be shared by several VRRP routers configured over the same physical
70 * interface. (See vrrpd_init_rxsock())
71 * - The TX socket is successfully created over the VNIC interface to send
72 * the VRRP advertisment. (See vrrpd_init_txsock())
73 * - The primary IP address has been successfully selected over the physical
74 * interface. (See vrrpd_select_primary())
76 * If a VRRP router is enabled but the other conditions haven't be satisfied,
77 * the router will be stay at the VRRP_STATE_INIT state. If all the above
78 * conditions are met, the VRRP router will be transit to either
79 * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
80 * protocol.
83 #define skip_whitespace(p) while (isspace(*(p))) ++(p)
85 #define BUFFSIZE 65536
87 #define VRRPCONF "/etc/inet/vrrp.conf"
89 typedef struct vrrpd_rtsock_s {
90 int vrt_af; /* address family */
91 int vrt_fd; /* socket for the PF_ROUTE msg */
92 iu_event_id_t vrt_eid; /* event ID */
93 } vrrpd_rtsock_t;
95 static ipadm_handle_t vrrp_ipadm_handle = NULL; /* libipadm handle */
96 static int vrrp_logflag = 0;
97 boolean_t vrrp_debug_level = 0;
98 iu_eh_t *vrrpd_eh = NULL;
99 iu_tq_t *vrrpd_timerq = NULL;
100 static vrrp_handle_t vrrpd_vh = NULL;
101 static int vrrpd_cmdsock_fd = -1; /* socket to communicate */
102 /* between vrrpd/libvrrpadm */
103 static iu_event_id_t vrrpd_cmdsock_eid = -1;
104 static int vrrpd_ctlsock_fd = -1; /* socket to bring up/down */
105 /* the virtual IP addresses */
106 static int vrrpd_ctlsock6_fd = -1;
107 static vrrpd_rtsock_t vrrpd_rtsocks[2] = {
108 {AF_INET, -1, -1},
109 {AF_INET6, -1, -1}
111 static iu_timer_id_t vrrp_scan_timer_id = -1;
113 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
114 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
115 static struct vrrp_vr_list_s vrrp_vr_list;
116 static struct vrrp_intf_list_s vrrp_intf_list;
117 static char vrrpd_conffile[MAXPATHLEN];
120 * Multicast address of VRRP advertisement in network byte order
122 static vrrp_addr_t vrrp_muladdr4;
123 static vrrp_addr_t vrrp_muladdr6;
125 static int vrrpd_scan_interval = 20000; /* ms */
126 static int pfds[2];
129 * macros to calculate skew_time and master_down_timer
131 * Note that the input is in centisecs and output are in msecs
133 #define SKEW_TIME(pri, intv) ((intv) * (256 - (pri)) / 256)
134 #define MASTER_DOWN_INTERVAL(pri, intv) (3 * (intv) + SKEW_TIME((pri), (intv)))
136 #define SKEW_TIME_VR(vr) \
137 SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
138 #define MASTER_DOWN_INTERVAL_VR(vr) \
139 MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
141 #define VRRP_CONF_UPDATE 0x01
142 #define VRRP_CONF_DELETE 0x02
144 static char *af_str(int);
146 static iu_tq_callback_t vrrp_adv_timeout;
147 static iu_tq_callback_t vrrp_b2m_timeout;
148 static iu_eh_callback_t vrrpd_sock_handler;
149 static iu_eh_callback_t vrrpd_rtsock_handler;
150 static iu_eh_callback_t vrrpd_cmdsock_handler;
152 static int daemon_init();
154 static vrrp_err_t vrrpd_init();
155 static void vrrpd_fini();
156 static vrrp_err_t vrrpd_cmdsock_create();
157 static void vrrpd_cmdsock_destroy();
158 static vrrp_err_t vrrpd_rtsock_create();
159 static void vrrpd_rtsock_destroy();
160 static vrrp_err_t vrrpd_ctlsock_create();
161 static void vrrpd_ctlsock_destroy();
163 static void vrrpd_scan_timer(iu_tq_t *, void *);
164 static void vrrpd_scan(int);
165 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
166 static void vrrpd_fini_rxsock(vrrp_vr_t *);
167 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
168 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
169 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
170 static void vrrpd_fini_txsock(vrrp_vr_t *);
172 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
173 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
174 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
175 static void vrrpd_delete_vr(vrrp_vr_t *);
177 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
178 static vrrp_err_t vrrpd_delete(const char *);
179 static vrrp_err_t vrrpd_enable(const char *, boolean_t);
180 static vrrp_err_t vrrpd_disable(const char *);
181 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
182 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
183 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);
185 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
186 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
187 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
188 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
189 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
190 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
191 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
192 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
193 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
194 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
195 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
196 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
197 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
198 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
199 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
200 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
201 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
202 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);
204 static void vrrpd_cmd_create(void *, void *, size_t *);
205 static void vrrpd_cmd_delete(void *, void *, size_t *);
206 static void vrrpd_cmd_enable(void *, void *, size_t *);
207 static void vrrpd_cmd_disable(void *, void *, size_t *);
208 static void vrrpd_cmd_modify(void *, void *, size_t *);
209 static void vrrpd_cmd_list(void *, void *, size_t *);
210 static void vrrpd_cmd_query(void *, void *, size_t *);
212 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
213 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
214 static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
215 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
216 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
217 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
218 uint64_t flags);
219 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);
221 static void vrrpd_init_ipcache(int);
222 static void vrrpd_update_ipcache(int);
223 static ipadm_status_t vrrpd_walk_addr_info(int);
224 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
225 int, uint64_t);
226 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
227 static void vrrpd_reselect_primary(vrrp_intf_t *);
228 static void vrrpd_reenable_all_vr();
229 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);
231 static uint16_t in_cksum(int, uint16_t, void *);
232 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
233 uint16_t, vrrp_pkt_t *);
234 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
235 uint16_t, vrrp_pkt_t *);
236 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);
238 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
239 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);
241 /* state transition functions */
242 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
243 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
244 static void vrrpd_state_m2i(vrrp_vr_t *);
245 static void vrrpd_state_b2i(vrrp_vr_t *);
246 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
247 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
248 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);
250 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
251 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
252 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
253 boolean_t);
254 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);
256 static void vrrpd_initconf();
257 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
258 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
259 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
260 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
261 static void vrrpd_cleanup();
263 static void vrrp_log(int, char *, ...);
264 static int timeval_to_milli(struct timeval);
265 static struct timeval timeval_delta(struct timeval, struct timeval);
267 typedef struct vrrpd_prop_s {
268 char *vs_propname;
269 boolean_t (*vs_propread)(vrrp_vr_conf_t *, const char *);
270 int (*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
271 } vrrp_prop_t;
274 * persistent VRRP properties array
276 static vrrp_prop_t vrrp_prop_info_tbl[] = {
277 {"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
278 {"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
279 {"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
280 {"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
281 {"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
282 {"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
283 {"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
284 {"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
285 {"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
288 #define VRRP_PROP_INFO_TABSIZE \
289 (sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))
291 typedef void vrrp_cmd_func_t(void *, void *, size_t *);
293 typedef struct vrrp_cmd_info_s {
294 vrrp_cmd_type_t vi_cmd;
295 size_t vi_reqsize;
296 size_t vi_acksize; /* 0 if the size is variable */
297 boolean_t vi_setop; /* Set operation? Check credentials */
298 vrrp_cmd_func_t *vi_cmdfunc;
299 } vrrp_cmd_info_t;
301 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
302 {VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
303 sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
304 {VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
305 sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
306 {VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
307 sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
308 {VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
309 sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
310 {VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
311 sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
312 {VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
313 _B_FALSE, vrrpd_cmd_query},
314 {VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
315 _B_FALSE, vrrpd_cmd_list}
318 #define VRRP_DOOR_INFO_TABLE_SIZE \
319 (sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))
321 static int
322 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
324 if (af == AF_INET) {
325 return (memcmp(&addr1->in4.sin_addr,
326 &addr2->in4.sin_addr, sizeof (struct in_addr)));
327 } else {
328 return (memcmp(&addr1->in6.sin6_addr,
329 &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
333 static vrrp_vr_t *
334 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
336 vrrp_vr_t *vr;
338 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
339 if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
340 vr->vvr_conf.vvc_vrid == vrid &&
341 vr->vvr_conf.vvc_af == af) {
342 break;
345 return (vr);
348 static vrrp_vr_t *
349 vrrpd_lookup_vr_by_name(const char *name)
351 vrrp_vr_t *vr;
353 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
354 if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
355 break;
357 return (vr);
360 static vrrp_intf_t *
361 vrrpd_lookup_if(const char *ifname, int af)
363 vrrp_intf_t *intf;
365 TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
366 if (strcmp(ifname, intf->vvi_ifname) == 0 &&
367 af == intf->vvi_af) {
368 break;
371 return (intf);
374 static vrrp_err_t
375 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
376 vrrp_intf_t **intfp)
378 vrrp_intf_t *intf;
380 vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
381 ifname, af_str(af), ifindex);
383 if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
384 vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
385 "allocate %s/%s interface", ifname, af_str(af));
386 return (VRRP_ENOMEM);
389 intf = *intfp;
390 TAILQ_INIT(&intf->vvi_iplist);
391 (void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
392 intf->vvi_af = af;
393 intf->vvi_sockfd = -1;
394 intf->vvi_nvr = 0;
395 intf->vvi_eid = -1;
396 intf->vvi_pip = NULL;
397 intf->vvi_ifindex = ifindex;
398 intf->vvi_state = NODE_STATE_NEW;
399 intf->vvi_vr_state = VRRP_STATE_INIT;
400 TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
401 return (VRRP_SUCCESS);
405 * An interface is deleted. If update_vr is true, the deletion of the interface
406 * may cause the state transition of assoicated VRRP router (if this interface
407 * is either the primary or the VNIC interface of the VRRP router); otherwise,
408 * simply delete the interface without updating the VRRP router.
410 static void
411 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
413 vrrp_ip_t *ip;
415 vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
416 intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");
418 if (update_vr) {
420 * If a this interface is the physical interface or the VNIC
421 * of a VRRP router, the deletion of the interface (no IP
422 * address exists on this interface) may cause the state
423 * transition of the VRRP router. call vrrpd_remove_if()
424 * to find all corresponding VRRP router and update their
425 * states.
427 vrrpd_remove_if(intf, _B_FALSE);
431 * First remove and delete all the IP addresses on the interface
433 while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
434 ip = TAILQ_FIRST(&intf->vvi_iplist);
435 vrrpd_delete_ip(intf, ip);
439 * Then remove and delete the interface
441 TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
442 (void) free(intf);
445 static vrrp_err_t
446 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
447 uint64_t flags)
449 vrrp_ip_t *ip;
450 char abuf[INET6_ADDRSTRLEN];
452 /* LINTED E_CONSTANT_CONDITION */
453 VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
454 vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
455 intf->vvi_ifname, lifname, abuf, flags);
457 if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
458 vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
459 "failed to allocate IP", lifname, abuf);
460 return (VRRP_ENOMEM);
463 (void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
464 ip->vip_state = NODE_STATE_NEW;
465 ip->vip_flags = flags;
466 (void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));
469 * Make sure link-local IPv6 IP addresses are at the head of the list
471 if (intf->vvi_af == AF_INET6 &&
472 IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
473 TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
474 } else {
475 TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
477 return (VRRP_SUCCESS);
480 static void
481 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
483 char abuf[INET6_ADDRSTRLEN];
484 int af = intf->vvi_af;
486 /* LINTED E_CONSTANT_CONDITION */
487 VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
488 vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
489 intf->vvi_ifname, ip->vip_lifname, abuf,
490 intf->vvi_pip == ip ? "" : "not ");
492 if (intf->vvi_pip == ip)
493 intf->vvi_pip = NULL;
495 TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
496 (void) free(ip);
499 static char *
500 rtm_event2str(uchar_t event)
502 switch (event) {
503 case RTM_NEWADDR:
504 return ("RTM_NEWADDR");
505 case RTM_DELADDR:
506 return ("RTM_DELADDR");
507 case RTM_IFINFO:
508 return ("RTM_IFINFO");
509 case RTM_ADD:
510 return ("RTM_ADD");
511 case RTM_DELETE:
512 return ("RTM_DELETE");
513 case RTM_CHANGE:
514 return ("RTM_CHANGE");
515 case RTM_OLDADD:
516 return ("RTM_OLDADD");
517 case RTM_OLDDEL:
518 return ("RTM_OLDDEL");
519 case RTM_CHGADDR:
520 return ("RTM_CHGADDR");
521 case RTM_FREEADDR:
522 return ("RTM_FREEADDR");
523 default:
524 return ("RTM_OTHER");
529 * This is called by the child process to inform the parent process to
530 * exit with the given return value. Note that the child process
531 * (the daemon process) informs the parent process to exit when anything
532 * goes wrong or when all the intialization is done.
534 static int
535 vrrpd_inform_parent_exit(int rv)
537 int err = 0;
540 * If vrrp_debug_level is none-zero, vrrpd is not running as
541 * a daemon. Return directly.
543 if (vrrp_debug_level != 0)
544 return (0);
546 if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
547 err = errno;
548 (void) close(pfds[1]);
549 return (err);
551 (void) close(pfds[1]);
552 return (0);
556 main(int argc, char *argv[])
558 int c, err;
559 struct sigaction sa;
560 sigset_t mask;
561 struct rlimit rl;
563 (void) setlocale(LC_ALL, "");
564 (void) textdomain(TEXT_DOMAIN);
567 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
568 * and PRIV_NET_ICMPACCESS to open the raw socket, PRIV_SYS_IP_CONFIG
569 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
570 * setrlimit().
572 * Note that sysevent is not supported in non-global zones.
574 if (getzoneid() == GLOBAL_ZONEID) {
575 err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
576 PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
577 PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
578 } else {
579 err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
580 PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
581 PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
584 if (err == -1) {
585 vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
586 return (EXIT_FAILURE);
590 * If vrrpd is started by other process, it will inherit the
591 * signal block mask. We unblock all signals to make sure the
592 * signal handling will work normally.
594 (void) sigfillset(&mask);
595 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
596 sa.sa_handler = vrrpd_cleanup;
597 sa.sa_flags = 0;
598 (void) sigemptyset(&sa.sa_mask);
599 (void) sigaction(SIGINT, &sa, NULL);
600 (void) sigaction(SIGQUIT, &sa, NULL);
601 (void) sigaction(SIGTERM, &sa, NULL);
603 vrrp_debug_level = 0;
604 (void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
605 while ((c = getopt(argc, argv, "d:f:")) != EOF) {
606 switch (c) {
607 case 'd':
608 vrrp_debug_level = atoi(optarg);
609 break;
610 case 'f':
611 (void) strlcpy(vrrpd_conffile, optarg,
612 sizeof (vrrpd_conffile));
613 break;
614 default:
615 break;
619 closefrom(3);
620 if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
621 vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
622 return (EXIT_FAILURE);
625 rl.rlim_cur = RLIM_INFINITY;
626 rl.rlim_max = RLIM_INFINITY;
627 if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
628 vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
629 goto child_out;
632 if (vrrpd_init() != VRRP_SUCCESS) {
633 vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
634 goto child_out;
638 * Get rid of unneeded privileges.
640 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
641 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);
644 * Read the configuration and initialize the existing VRRP
645 * configuration
647 vrrpd_initconf();
650 * Inform the parent process that it can successfully exit.
652 if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
653 vrrpd_cleanup();
654 vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
655 strerror(err));
656 return (EXIT_FAILURE);
660 * Start the loop to handle the timer and the IO events.
662 switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
663 case -1:
664 vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
665 "abnormally");
666 break;
667 default:
668 break;
671 vrrpd_cleanup();
672 return (EXIT_SUCCESS);
674 child_out:
675 (void) vrrpd_inform_parent_exit(EXIT_FAILURE);
676 return (EXIT_FAILURE);
679 static int
680 daemon_init()
682 pid_t pid;
683 int rv;
685 vrrp_log(VRRP_DBG0, "daemon_init()");
687 if (getenv("SMF_FMRI") == NULL) {
688 vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(5) managed "
689 "service and should not be run from the command line.");
690 return (-1);
694 * Create the pipe used for the child process to inform the parent
695 * process to exit after all initialization is done.
697 if (pipe(pfds) < 0) {
698 vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
699 strerror(errno));
700 return (-1);
703 if ((pid = fork()) < 0) {
704 vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
705 strerror(errno));
706 (void) close(pfds[0]);
707 (void) close(pfds[1]);
708 return (-1);
711 if (pid != 0) { /* Parent */
712 (void) close(pfds[1]);
715 * Read the child process's return value from the pfds.
716 * If the child process exits unexpectedly, read() returns -1.
718 if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
719 vrrp_log(VRRP_ERR, "daemon_init(): child process "
720 "exited unexpectedly %s", strerror(errno));
721 (void) kill(pid, SIGTERM);
722 rv = EXIT_FAILURE;
724 (void) close(pfds[0]);
725 exit(rv);
729 * in child process, became a daemon, and return to main() to continue.
731 (void) close(pfds[0]);
732 (void) chdir("/");
733 (void) setsid();
734 (void) close(0);
735 (void) close(1);
736 (void) close(2);
737 (void) open("/dev/null", O_RDWR, 0);
738 (void) dup2(0, 1);
739 (void) dup2(0, 2);
740 openlog("vrrpd", LOG_PID, LOG_DAEMON);
741 vrrp_logflag = 1;
742 return (0);
745 static vrrp_err_t
746 vrrpd_init()
748 vrrp_err_t err = VRRP_ESYS;
750 vrrp_log(VRRP_DBG0, "vrrpd_init()");
752 TAILQ_INIT(&vrrp_vr_list);
753 TAILQ_INIT(&vrrp_intf_list);
755 if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
756 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
757 goto fail;
760 if ((vrrpd_timerq = iu_tq_create()) == NULL) {
761 vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
762 goto fail;
765 if ((vrrpd_eh = iu_eh_create()) == NULL) {
766 vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
767 goto fail;
771 * Create the AF_UNIX socket used to communicate with libvrrpadm.
773 * This socket is used to receive the administrative requests and
774 * send back the results.
776 if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
777 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
778 "failed");
779 goto fail;
783 * Create the VRRP control socket used to bring up/down the virtual
784 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
785 * the virtual IP addresses.
787 if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
788 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
789 "failed");
790 goto fail;
794 * Create the PF_ROUTER socket used to listen to the routing socket
795 * messages and build the interface/IP address list.
797 if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
798 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
799 "failed");
800 goto fail;
803 /* Open the libipadm handle */
804 if (ipadm_open(&vrrp_ipadm_handle, 0) != IPADM_SUCCESS) {
805 vrrp_log(VRRP_ERR, "vrrpd_init(): ipadm_open() failed");
806 goto fail;
810 * Build the list of interfaces and IP addresses. Also, start the time
811 * to scan the interfaces/IP addresses periodically.
813 vrrpd_scan(AF_INET);
814 vrrpd_scan(AF_INET6);
815 if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
816 vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
817 vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
818 goto fail;
822 * Initialize the VRRP multicast address.
824 bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
825 vrrp_muladdr4.in4.sin_family = AF_INET;
826 (void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);
828 bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
829 vrrp_muladdr6.in6.sin6_family = AF_INET6;
830 (void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);
832 return (VRRP_SUCCESS);
834 fail:
835 vrrpd_fini();
836 return (err);
839 static void
840 vrrpd_fini()
842 vrrp_log(VRRP_DBG0, "vrrpd_fini()");
844 (void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
845 vrrp_scan_timer_id = -1;
847 vrrpd_rtsock_destroy();
848 vrrpd_ctlsock_destroy();
849 vrrpd_cmdsock_destroy();
851 if (vrrpd_eh != NULL) {
852 iu_eh_destroy(vrrpd_eh);
853 vrrpd_eh = NULL;
856 if (vrrpd_timerq != NULL) {
857 iu_tq_destroy(vrrpd_timerq);
858 vrrpd_timerq = NULL;
861 vrrp_close(vrrpd_vh);
862 vrrpd_vh = NULL;
863 assert(TAILQ_EMPTY(&vrrp_vr_list));
864 assert(TAILQ_EMPTY(&vrrp_intf_list));
866 ipadm_close(vrrp_ipadm_handle);
869 static void
870 vrrpd_cleanup(void)
872 vrrp_vr_t *vr;
873 vrrp_intf_t *intf;
875 vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");
877 while (!TAILQ_EMPTY(&vrrp_vr_list)) {
878 vr = TAILQ_FIRST(&vrrp_vr_list);
879 vrrpd_delete_vr(vr);
882 while (!TAILQ_EMPTY(&vrrp_intf_list)) {
883 intf = TAILQ_FIRST(&vrrp_intf_list);
884 vrrpd_delete_if(intf, _B_FALSE);
887 vrrpd_fini();
888 closelog();
889 exit(1);
893 * Read the configuration file and initialize all the existing VRRP routers.
895 static void
896 vrrpd_initconf()
898 FILE *fp;
899 char line[LINE_MAX];
900 int linenum = 0;
901 vrrp_vr_conf_t conf;
902 vrrp_err_t err;
904 vrrp_log(VRRP_DBG0, "vrrpd_initconf()");
906 if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
907 vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
908 vrrpd_conffile);
909 return;
912 while (fgets(line, sizeof (line), fp) != NULL) {
913 linenum++;
914 conf.vvc_vrid = VRRP_VRID_NONE;
915 if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
916 vrrp_log(VRRP_ERR, "failed to parse %d line %s",
917 linenum, line);
918 continue;
922 * Blank or comment line
924 if (conf.vvc_vrid == VRRP_VRID_NONE)
925 continue;
928 * No need to update the configuration since the VRRP router
929 * created/enabled based on the existing configuration.
931 if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
932 vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
933 "%s", conf.vvc_name, vrrp_err2str(err));
934 continue;
937 if (conf.vvc_enabled &&
938 ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
939 VRRP_SUCCESS)) {
940 vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
941 conf.vvc_name, vrrp_err2str(err));
945 (void) fclose(fp);
949 * Create the AF_UNIX socket used to communicate with libvrrpadm.
951 * This socket is used to receive the administrative request and
952 * send back the results.
954 static vrrp_err_t
955 vrrpd_cmdsock_create()
957 iu_event_id_t eid;
958 struct sockaddr_un laddr;
959 int sock, flags;
961 vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");
963 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
964 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
965 "failed: %s", strerror(errno));
966 return (VRRP_ESYS);
970 * Set it to be non-blocking.
972 flags = fcntl(sock, F_GETFL, 0);
973 (void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));
976 * Unlink first in case a previous daemon instance exited ungracefully.
978 (void) unlink(VRRPD_SOCKET);
980 bzero(&laddr, sizeof (laddr));
981 laddr.sun_family = AF_UNIX;
982 (void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
983 if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
984 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
985 strerror(errno));
986 (void) close(sock);
987 return (VRRP_ESYS);
990 if (listen(sock, 30) < 0) {
991 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
992 "failed: %s", strerror(errno));
993 (void) close(sock);
994 return (VRRP_ESYS);
997 if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
998 vrrpd_cmdsock_handler, NULL)) == -1) {
999 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
1000 " failed");
1001 (void) close(sock);
1002 return (VRRP_ESYS);
1005 vrrpd_cmdsock_fd = sock;
1006 vrrpd_cmdsock_eid = eid;
1007 return (VRRP_SUCCESS);
1010 static void
1011 vrrpd_cmdsock_destroy()
1013 vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");
1015 (void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
1016 (void) close(vrrpd_cmdsock_fd);
1017 vrrpd_cmdsock_fd = -1;
1018 vrrpd_cmdsock_eid = -1;
1022 * Create the PF_ROUTER sockets used to listen to the routing socket
1023 * messages and build the interface/IP address list. Create one for
1024 * each address family (IPv4 and IPv6).
1026 static vrrp_err_t
1027 vrrpd_rtsock_create()
1029 int i, flags, sock;
1030 iu_event_id_t eid;
1032 vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");
1034 for (i = 0; i < 2; i++) {
1035 sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
1036 if (sock == -1) {
1037 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
1038 "failed: %s", strerror(errno));
1039 break;
1043 * Set it to be non-blocking.
1045 if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
1046 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1047 "fcntl(F_GETFL) failed: %s", strerror(errno));
1048 break;
1051 if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
1052 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1053 "fcntl(F_SETFL) failed: %s", strerror(errno));
1054 break;
1057 if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
1058 vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
1059 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
1060 "rtsock %d(%s) failed", sock,
1061 af_str(vrrpd_rtsocks[i].vrt_af));
1062 break;
1065 vrrpd_rtsocks[i].vrt_fd = sock;
1066 vrrpd_rtsocks[i].vrt_eid = eid;
1069 if (i != 2) {
1070 (void) close(sock);
1071 vrrpd_rtsock_destroy();
1072 return (VRRP_ESYS);
1075 return (VRRP_SUCCESS);
1078 static void
1079 vrrpd_rtsock_destroy()
1081 int i;
1083 vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
1084 for (i = 0; i < 2; i++) {
1085 (void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
1086 NULL);
1087 (void) close(vrrpd_rtsocks[i].vrt_fd);
1088 vrrpd_rtsocks[i].vrt_eid = -1;
1089 vrrpd_rtsocks[i].vrt_fd = -1;
1094 * Create the VRRP control socket used to bring up/down the virtual
1095 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
1096 * the virtual IP addresses.
1098 static vrrp_err_t
1099 vrrpd_ctlsock_create()
1101 int s, s6;
1102 int on = _B_TRUE;
1104 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1105 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
1106 "failed: %s", strerror(errno));
1107 return (VRRP_ESYS);
1109 if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1110 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1111 "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
1112 (void) close(s);
1113 return (VRRP_ESYS);
1116 if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
1117 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
1118 "failed: %s", strerror(errno));
1119 (void) close(s);
1120 return (VRRP_ESYS);
1122 if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1123 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1124 "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
1125 (void) close(s);
1126 (void) close(s6);
1127 return (VRRP_ESYS);
1130 vrrpd_ctlsock_fd = s;
1131 vrrpd_ctlsock6_fd = s6;
1132 return (VRRP_SUCCESS);
1135 static void
1136 vrrpd_ctlsock_destroy()
1138 (void) close(vrrpd_ctlsock_fd);
1139 vrrpd_ctlsock_fd = -1;
1140 (void) close(vrrpd_ctlsock6_fd);
1141 vrrpd_ctlsock6_fd = -1;
1144 /*ARGSUSED*/
1145 static void
1146 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
1148 vrrp_cmd_create_t *cmd = (vrrp_cmd_create_t *)arg1;
1149 vrrp_ret_create_t *ret = (vrrp_ret_create_t *)arg2;
1150 vrrp_err_t err;
1152 err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
1153 if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
1155 * No need to update the configuration since it is already
1156 * done in the above vrrpd_create() call
1158 err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
1159 if (err != VRRP_SUCCESS)
1160 (void) vrrpd_delete(cmd->vcc_conf.vvc_name);
1162 ret->vrc_err = err;
1165 /*ARGSUSED*/
1166 static void
1167 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
1169 vrrp_cmd_delete_t *cmd = (vrrp_cmd_delete_t *)arg1;
1170 vrrp_ret_delete_t *ret = (vrrp_ret_delete_t *)arg2;
1172 ret->vrd_err = vrrpd_delete(cmd->vcd_name);
1175 /*ARGSUSED*/
1176 static void
1177 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
1179 vrrp_cmd_enable_t *cmd = (vrrp_cmd_enable_t *)arg1;
1180 vrrp_ret_enable_t *ret = (vrrp_ret_enable_t *)arg2;
1182 ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
1185 /*ARGSUSED*/
1186 static void
1187 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
1189 vrrp_cmd_disable_t *cmd = (vrrp_cmd_disable_t *)arg1;
1190 vrrp_ret_disable_t *ret = (vrrp_ret_disable_t *)arg2;
1192 ret->vrx_err = vrrpd_disable(cmd->vcx_name);
1195 /*ARGSUSED*/
1196 static void
1197 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
1199 vrrp_cmd_modify_t *cmd = (vrrp_cmd_modify_t *)arg1;
1200 vrrp_ret_modify_t *ret = (vrrp_ret_modify_t *)arg2;
1202 ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
1205 static void
1206 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
1208 vrrp_cmd_query_t *cmd = (vrrp_cmd_query_t *)arg1;
1210 vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
1213 static void
1214 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
1216 vrrp_cmd_list_t *cmd = (vrrp_cmd_list_t *)arg1;
1218 vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
1222 * Write-type requeset must have the solaris.network.vrrp authorization.
1224 static boolean_t
1225 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
1227 ucred_t *cred = NULL;
1228 uid_t uid;
1229 struct passwd *pw;
1230 boolean_t success = _B_FALSE;
1232 vrrp_log(VRRP_DBG0, "vrrp_auth_check()");
1234 if (!cinfo->vi_setop)
1235 return (_B_TRUE);
1238 * Validate the credential
1240 if (getpeerucred(connfd, &cred) == (uid_t)-1) {
1241 vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
1242 "failed: %s", strerror(errno));
1243 return (_B_FALSE);
1246 if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
1247 vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
1248 "failed: %s", strerror(errno));
1249 goto done;
1252 if ((pw = getpwuid(uid)) == NULL) {
1253 vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
1254 goto done;
1257 success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);
1259 done:
1260 ucred_free(cred);
1261 return (success);
1265 * Process the administrative request from libvrrpadm
1267 /* ARGSUSED */
1268 static void
1269 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
1270 void *arg)
1272 vrrp_cmd_info_t *cinfo = NULL;
1273 vrrp_err_t err = VRRP_SUCCESS;
1274 uchar_t buf[BUFFSIZE], ackbuf[BUFFSIZE];
1275 size_t cursize, acksize, len;
1276 uint32_t cmd;
1277 int connfd, i;
1278 struct sockaddr_in from;
1279 socklen_t fromlen;
1281 vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");
1283 fromlen = (socklen_t)sizeof (from);
1284 if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
1285 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
1286 strerror(errno));
1287 return;
1291 * First get the type of the request
1293 cursize = 0;
1294 while (cursize < sizeof (uint32_t)) {
1295 len = read(connfd, buf + cursize,
1296 sizeof (uint32_t) - cursize);
1297 if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1298 continue;
1299 } else if (len > 0) {
1300 cursize += len;
1301 continue;
1303 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1304 "length");
1305 (void) close(connfd);
1306 return;
1309 /* LINTED E_BAD_PTR_CAST_ALIGN */
1310 cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
1311 for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
1312 if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
1313 cinfo = vrrp_cmd_info_tbl + i;
1314 break;
1318 if (cinfo == NULL) {
1319 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
1320 "type %d", cmd);
1321 err = VRRP_EINVAL;
1322 goto done;
1326 * Get the rest of the request.
1328 assert(cursize == sizeof (uint32_t));
1329 while (cursize < cinfo->vi_reqsize) {
1330 len = read(connfd, buf + cursize,
1331 cinfo->vi_reqsize - cursize);
1332 if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1333 continue;
1334 } else if (len > 0) {
1335 cursize += len;
1336 continue;
1338 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1339 "length");
1340 err = VRRP_EINVAL;
1341 goto done;
1345 * Validate the authorization
1347 if (!vrrp_auth_check(connfd, cinfo)) {
1348 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
1349 "not sufficient authorization");
1350 err = VRRP_EPERM;
1353 done:
1355 * Ack the request
1357 if (err != 0) {
1358 /* LINTED E_BAD_PTR_CAST_ALIGN */
1359 ((vrrp_ret_t *)ackbuf)->vr_err = err;
1360 acksize = sizeof (vrrp_ret_t);
1361 } else {
1363 * If the size of ack is varied, the cmdfunc callback
1364 * will set the right size.
1366 if ((acksize = cinfo->vi_acksize) == 0)
1367 acksize = sizeof (ackbuf);
1369 /* LINTED E_BAD_PTR_CAST_ALIGN */
1370 cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
1374 * Send the ack back.
1376 cursize = 0;
1377 while (cursize < acksize) {
1378 len = sendto(connfd, ackbuf + cursize, acksize - cursize,
1379 0, (struct sockaddr *)&from, fromlen);
1380 if (len == (size_t)-1 && errno == EAGAIN) {
1381 continue;
1382 } else if (len > 0) {
1383 cursize += len;
1384 continue;
1385 } else {
1386 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
1387 "ack: %s", strerror(errno));
1388 break;
1392 (void) shutdown(connfd, SHUT_RDWR);
1393 (void) close(connfd);
1397 * Process the routing socket messages and update the interfaces/IP addresses
1398 * list
1400 /* ARGSUSED */
1401 static void
1402 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
1403 iu_event_id_t id, void *arg)
1405 char buf[BUFFSIZE];
1406 struct ifa_msghdr *ifam;
1407 int nbytes;
1408 int af = *(int *)arg;
1409 boolean_t scanif = _B_FALSE;
1411 for (;;) {
1412 nbytes = read(s, buf, sizeof (buf));
1413 if (nbytes <= 0) {
1414 /* No more messages */
1415 break;
1418 /* LINTED E_BAD_PTR_CAST_ALIGN */
1419 ifam = (struct ifa_msghdr *)buf;
1420 if (ifam->ifam_version != RTM_VERSION) {
1421 vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
1422 "not understood", ifam->ifam_version);
1423 break;
1426 vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
1427 rtm_event2str(ifam->ifam_type));
1429 switch (ifam->ifam_type) {
1430 case RTM_FREEADDR:
1431 case RTM_CHGADDR:
1432 case RTM_NEWADDR:
1433 case RTM_DELADDR:
1435 * An IP address has been created/updated/deleted or
1436 * brought up/down, re-initilialize the interface/IP
1437 * address list.
1439 scanif = _B_TRUE;
1440 break;
1441 default:
1442 /* Not interesting */
1443 break;
1447 if (scanif)
1448 vrrpd_scan(af);
1452 * Periodically scan the interface/IP addresses on the system.
1454 /* ARGSUSED */
1455 static void
1456 vrrpd_scan_timer(iu_tq_t *tq, void *arg)
1458 vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
1459 vrrpd_scan(AF_INET);
1460 vrrpd_scan(AF_INET6);
1464 * Get the list of the interface/IP addresses of the specified address
1465 * family.
1467 static void
1468 vrrpd_scan(int af)
1470 vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));
1472 again:
1473 vrrpd_init_ipcache(af);
1475 /* If interface index changes, walk again. */
1476 if (vrrpd_walk_addr_info(af) != IPADM_SUCCESS)
1477 goto again;
1479 vrrpd_update_ipcache(af);
1483 * First mark all IP addresses of the specific address family to be removed.
1484 * This flag will then be cleared when we walk up all the IP addresses.
1486 static void
1487 vrrpd_init_ipcache(int af)
1489 vrrp_intf_t *intf, *next_intf;
1490 vrrp_ip_t *ip, *nextip;
1491 char abuf[INET6_ADDRSTRLEN];
1493 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));
1495 next_intf = TAILQ_FIRST(&vrrp_intf_list);
1496 while ((intf = next_intf) != NULL) {
1497 next_intf = TAILQ_NEXT(intf, vvi_next);
1498 if (intf->vvi_af != af)
1499 continue;
1502 * If the interface is still marked as new, it means that this
1503 * vrrpd_init_ipcache() call is a result of ifindex change,
1504 * which causes the re-walk of all the interfaces (see
1505 * vrrpd_add_ipaddr()), and some interfaces are still marked
1506 * as new during the last walk. In this case, delete this
1507 * interface with the "update_vr" argument to be _B_FALSE,
1508 * since no VRRP router has been assoicated with this
1509 * interface yet (the association is done in
1510 * vrrpd_update_ipcache()).
1512 * This interface will be re-added later if it still exists.
1514 if (intf->vvi_state == NODE_STATE_NEW) {
1515 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
1516 "(%d), may be added later", intf->vvi_ifname,
1517 intf->vvi_ifindex);
1518 vrrpd_delete_if(intf, _B_FALSE);
1519 continue;
1522 for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1523 ip = nextip) {
1524 nextip = TAILQ_NEXT(ip, vip_next);
1525 /* LINTED E_CONSTANT_CONDITION */
1526 VRRPADDR2STR(af, &ip->vip_addr, abuf,
1527 INET6_ADDRSTRLEN, _B_FALSE);
1529 if (ip->vip_state != NODE_STATE_NEW) {
1530 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
1531 "%s(%s/0x%x))", intf->vvi_ifname,
1532 intf->vvi_ifindex, ip->vip_lifname,
1533 abuf, ip->vip_flags);
1534 ip->vip_state = NODE_STATE_STALE;
1535 continue;
1539 * If the IP is still marked as new, it means that
1540 * this vrrpd_init_ipcache() call is a result of
1541 * ifindex change, which causes the re-walk of all
1542 * the IP addresses (see vrrpd_add_ipaddr()).
1543 * Delete this IP.
1545 * This IP will be readded later if it still exists.
1547 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
1548 "%s/%d , %s(%s)", intf->vvi_ifname,
1549 intf->vvi_ifindex, ip->vip_lifname, abuf);
1550 vrrpd_delete_ip(intf, ip);
1556 * Walk all the IP addresses of the given family and update its
1557 * addresses list. Return IPADM_FAILURE if it is required to walk
1558 * all the interfaces again (one of the interface index changes in between).
1560 static ipadm_status_t
1561 vrrpd_walk_addr_info(int af)
1563 ipadm_addr_info_t *ainfo, *ainfop;
1564 ipadm_status_t ipstatus;
1565 char *lifname;
1566 struct sockaddr_storage stor;
1567 vrrp_addr_t *addr;
1568 int ifindex;
1569 uint64_t flags;
1571 vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s)", af_str(af));
1573 ipstatus = ipadm_addr_info(vrrp_ipadm_handle, NULL, &ainfo, 0, 0);
1574 if (ipstatus != IPADM_SUCCESS) {
1575 vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1576 "ipadm_addr_info() failed: %s",
1577 af_str(af), ipadm_status2str(ipstatus));
1578 return (IPADM_SUCCESS);
1581 for (ainfop = ainfo; ainfop != NULL; ainfop = IA_NEXT(ainfop)) {
1582 if (ainfop->ia_ifa.ifa_addr->sa_family != af)
1583 continue;
1585 lifname = ainfop->ia_ifa.ifa_name;
1586 flags = ainfop->ia_ifa.ifa_flags;
1587 (void) memcpy(&stor, ainfop->ia_ifa.ifa_addr, sizeof (stor));
1588 addr = (vrrp_addr_t *)&stor;
1590 vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): %s",
1591 af_str(af), lifname);
1593 /* Skip virtual/IPMP/P2P interfaces */
1594 if (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) {
1595 vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): "
1596 "skipped %s", af_str(af), lifname);
1597 continue;
1600 /* Filter out the all-zero IP address */
1601 if (VRRPADDR_UNSPECIFIED(af, addr))
1602 continue;
1604 if ((ifindex = if_nametoindex(lifname)) == 0) {
1605 if (errno != ENXIO && errno != ENOENT) {
1606 vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1607 "if_nametoindex() failed for %s: %s",
1608 af_str(af), lifname, strerror(errno));
1610 break;
1614 * The interface is unplumbed/replumbed during the walk. Try
1615 * to walk the IP addresses one more time.
1617 if (vrrpd_add_ipaddr(lifname, af, addr, ifindex, flags)
1618 == VRRP_EAGAIN) {
1619 ipstatus = IPADM_FAILURE;
1620 break;
1624 ipadm_free_addr_info(ainfo);
1625 return (ipstatus);
1629 * Given the information of each IP address, update the interface and
1630 * IP addresses list
1632 static vrrp_err_t
1633 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
1634 uint64_t flags)
1636 char ifname[LIFNAMSIZ], *c;
1637 vrrp_intf_t *intf;
1638 vrrp_ip_t *ip;
1639 char abuf[INET6_ADDRSTRLEN];
1640 vrrp_err_t err;
1642 /* LINTED E_CONSTANT_CONDITION */
1643 VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
1644 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
1645 abuf, ifindex, flags);
1648 * Get the physical interface name from the logical interface name.
1650 (void) strlcpy(ifname, lifname, sizeof (ifname));
1651 if ((c = strchr(ifname, ':')) != NULL)
1652 *c = '\0';
1654 if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
1655 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
1656 err = vrrpd_create_if(ifname, af, ifindex, &intf);
1657 if (err != VRRP_SUCCESS)
1658 return (err);
1659 } else if (intf->vvi_ifindex != ifindex) {
1661 * If index changes, it means that this interface is
1662 * unplumbed/replumbed since we last checked. If this
1663 * interface is not used by any VRRP router, just
1664 * update its ifindex, and the IP addresses list will
1665 * be updated later. Otherwise, return EAGAIN to rewalk
1666 * all the IP addresses from the beginning.
1668 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
1669 "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
1670 if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
1671 intf->vvi_ifindex = ifindex;
1672 } else {
1674 * delete this interface from the list if this
1675 * interface has already been assoicated with
1676 * any VRRP routers.
1678 vrrpd_delete_if(intf, _B_TRUE);
1679 return (VRRP_EAGAIN);
1684 * Does this IP address already exist?
1686 TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
1687 if (strcmp(ip->vip_lifname, lifname) == 0)
1688 break;
1691 if (ip != NULL) {
1692 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
1693 lifname, abuf);
1694 ip->vip_state = NODE_STATE_NONE;
1695 ip->vip_flags = flags;
1696 if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
1698 * Address has been changed, mark it as new
1699 * If this address is already selected as the
1700 * primary IP address, the new IP will be checked
1701 * to see whether it is still qualified as the
1702 * primary IP address. If not, the primary IP
1703 * address will be reselected.
1705 (void) memcpy(&ip->vip_addr, addr,
1706 sizeof (vrrp_addr_t));
1708 ip->vip_state = NODE_STATE_NEW;
1710 } else {
1711 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
1712 lifname, abuf);
1714 err = vrrpd_create_ip(intf, lifname, addr, flags);
1715 if (err != VRRP_SUCCESS)
1716 return (err);
1718 return (VRRP_SUCCESS);
1722 * Update the interface and IP addresses list. Remove the ones that have been
1723 * staled since last time we walk the IP addresses and updated the ones that
1724 * have been changed.
1726 static void
1727 vrrpd_update_ipcache(int af)
1729 vrrp_intf_t *intf, *nextif;
1730 vrrp_ip_t *ip, *nextip;
1731 char abuf[INET6_ADDRSTRLEN];
1732 boolean_t primary_selected;
1733 boolean_t primary_now_selected;
1734 boolean_t need_reenable = _B_FALSE;
1736 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));
1738 nextif = TAILQ_FIRST(&vrrp_intf_list);
1739 while ((intf = nextif) != NULL) {
1740 nextif = TAILQ_NEXT(intf, vvi_next);
1741 if (intf->vvi_af != af)
1742 continue;
1745 * Does the interface already select its primary IP address?
1747 primary_selected = (intf->vvi_pip != NULL);
1748 assert(!primary_selected || IS_PRIMARY_INTF(intf));
1751 * Removed the IP addresses that have been unconfigured.
1753 for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1754 ip = nextip) {
1755 nextip = TAILQ_NEXT(ip, vip_next);
1756 if (ip->vip_state != NODE_STATE_STALE)
1757 continue;
1759 /* LINTED E_CONSTANT_CONDITION */
1760 VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1761 _B_FALSE);
1762 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
1763 "is removed over %s", abuf, intf->vvi_ifname);
1764 vrrpd_delete_ip(intf, ip);
1768 * No IP addresses left, delete this interface.
1770 if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1771 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1772 "no IP left over %s", intf->vvi_ifname);
1773 vrrpd_delete_if(intf, _B_TRUE);
1774 continue;
1778 * If this is selected ss the physical interface for any
1779 * VRRP router, reselect the primary address if needed.
1781 if (IS_PRIMARY_INTF(intf)) {
1782 vrrpd_reselect_primary(intf);
1783 primary_now_selected = (intf->vvi_pip != NULL);
1786 * Cannot find the new primary IP address.
1788 if (primary_selected && !primary_now_selected) {
1789 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
1790 "reselect primary IP on %s failed",
1791 intf->vvi_ifname);
1792 vrrpd_remove_if(intf, _B_TRUE);
1793 } else if (!primary_selected && primary_now_selected) {
1795 * The primary IP address is successfully
1796 * selected on the physical interfacew we
1797 * need to walk through all the VRRP routers
1798 * that is created on this physical interface
1799 * and see whether they can now be enabled.
1801 need_reenable = _B_TRUE;
1806 * For every new virtual IP address, bring up/down it based
1807 * on the state of VRRP router.
1809 * Note that it is fine to not update the IP's vip_flags field
1810 * even if vrrpd_virtualip_updateone() changed the address's
1811 * up/down state, since the vip_flags field is only used for
1812 * select primary IP address over a physical interface, and
1813 * vrrpd_virtualip_updateone() only affects the virtual IP
1814 * address's status.
1816 for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1817 ip = nextip) {
1818 nextip = TAILQ_NEXT(ip, vip_next);
1819 /* LINTED E_CONSTANT_CONDITION */
1820 VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1821 _B_FALSE);
1822 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1823 "IP %s over %s%s", abuf, intf->vvi_ifname,
1824 ip->vip_state == NODE_STATE_NEW ? " is new" : "");
1826 if (IS_VIRTUAL_INTF(intf)) {
1828 * If this IP is new, update its up/down state
1829 * based on the virtual interface's state
1830 * (which is determined by the VRRP router's
1831 * state). Otherwise, check only and prompt
1832 * warnings if its up/down state has been
1833 * changed.
1835 if (vrrpd_virtualip_updateone(intf, ip,
1836 ip->vip_state == NODE_STATE_NONE) !=
1837 VRRP_SUCCESS) {
1838 vrrp_log(VRRP_DBG0,
1839 "vrrpd_update_ipcache(): "
1840 "IP %s over %s update failed", abuf,
1841 intf->vvi_ifname);
1842 vrrpd_delete_ip(intf, ip);
1843 continue;
1846 ip->vip_state = NODE_STATE_NONE;
1850 * The IP address is deleted when it is failed to be brought
1851 * up. If no IP addresses are left, delete this interface.
1853 if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1854 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1855 "no IP left over %s", intf->vvi_ifname);
1856 vrrpd_delete_if(intf, _B_TRUE);
1857 continue;
1860 if (intf->vvi_state == NODE_STATE_NEW) {
1862 * A new interface is found. This interface can be
1863 * the primary interface or the virtual VNIC
1864 * interface. Again, we need to walk throught all
1865 * the VRRP routers to see whether some of them can
1866 * now be enabled because of the new primary IP
1867 * address or the new virtual IP addresses.
1869 intf->vvi_state = NODE_STATE_NONE;
1870 need_reenable = _B_TRUE;
1874 if (need_reenable)
1875 vrrpd_reenable_all_vr();
1879 * Reselect primary IP if:
1880 * - The existing primary IP is no longer qualified (removed or it is down or
1881 * not a link-local IP for IPv6 VRRP router);
1882 * - This is a physical interface but no primary IP is chosen;
1884 static void
1885 vrrpd_reselect_primary(vrrp_intf_t *intf)
1887 vrrp_ip_t *ip;
1888 char abuf[INET6_ADDRSTRLEN];
1890 assert(IS_PRIMARY_INTF(intf));
1893 * If the interface's old primary IP address is still valid, return
1895 if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
1896 return;
1898 if (ip != NULL) {
1899 /* LINTED E_CONSTANT_CONDITION */
1900 VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1901 sizeof (abuf), _B_FALSE);
1902 vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1903 "is no longer qualified", intf->vvi_ifname, abuf);
1906 ip = vrrpd_select_primary(intf);
1907 intf->vvi_pip = ip;
1909 if (ip != NULL) {
1910 /* LINTED E_CONSTANT_CONDITION */
1911 VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1912 sizeof (abuf), _B_FALSE);
1913 vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1914 "is selected", intf->vvi_ifname, abuf);
1919 * Select the primary IP address. Since the link-local IP address is always
1920 * at the head of the IP address list, try to find the first UP IP address
1921 * and see whether it qualify.
1923 static vrrp_ip_t *
1924 vrrpd_select_primary(vrrp_intf_t *pif)
1926 vrrp_ip_t *pip;
1927 char abuf[INET6_ADDRSTRLEN];
1929 vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);
1931 TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
1932 assert(pip->vip_state != NODE_STATE_STALE);
1934 /* LINTED E_CONSTANT_CONDITION */
1935 VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
1936 INET6_ADDRSTRLEN, _B_FALSE);
1937 vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
1938 pif->vvi_ifname, abuf,
1939 (pip->vip_flags & IFF_UP) ? "up" : "down");
1941 if (pip->vip_flags & IFF_UP)
1942 break;
1946 * Is this valid primary IP address?
1948 if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
1949 vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
1950 pif->vvi_ifname, af_str(pif->vvi_af));
1951 return (NULL);
1953 return (pip);
1957 * This is a new interface. Check whether any VRRP router is waiting for it
1959 static void
1960 vrrpd_reenable_all_vr()
1962 vrrp_vr_t *vr;
1964 vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");
1966 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1967 if (vr->vvr_conf.vvc_enabled)
1968 (void) vrrpd_enable_vr(vr);
1973 * If primary_addr_gone is _B_TRUE, it means that we failed to select
1974 * the primary IP address on this (physical) interface; otherwise,
1975 * it means the interface is no longer available.
1977 static void
1978 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
1980 vrrp_vr_t *vr;
1982 vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
1983 primary_addr_gone ? "primary address gone" : "interface deleted");
1985 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1986 if (vr->vvr_conf.vvc_enabled)
1987 vrrpd_disable_vr(vr, intf, primary_addr_gone);
1992 * Update the VRRP configuration file based on the given configuration.
1993 * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
1995 static vrrp_err_t
1996 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
1998 vrrp_vr_conf_t conf;
1999 FILE *fp, *nfp;
2000 int nfd;
2001 char line[LINE_MAX];
2002 char newfile[MAXPATHLEN];
2003 boolean_t found = _B_FALSE;
2004 vrrp_err_t err = VRRP_SUCCESS;
2006 vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
2007 op == VRRP_CONF_UPDATE ? "update" : "delete");
2009 if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
2010 if (errno != ENOENT) {
2011 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s for "
2012 "update failed: %s", vrrpd_conffile,
2013 strerror(errno));
2014 return (VRRP_EDB);
2017 if ((fp = fopen(vrrpd_conffile, "w+F")) == NULL) {
2018 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s for "
2019 "write failed: %s", vrrpd_conffile,
2020 strerror(errno));
2021 return (VRRP_EDB);
2025 (void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
2026 if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
2027 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
2028 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2029 newfile, strerror(errno));
2030 (void) fclose(fp);
2031 return (VRRP_EDB);
2034 if ((nfp = fdopen(nfd, "wF")) == NULL) {
2035 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
2036 newfile, strerror(errno));
2037 goto done;
2040 while (fgets(line, sizeof (line), fp) != NULL) {
2041 conf.vvc_vrid = VRRP_VRID_NONE;
2042 if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
2043 VRRP_SUCCESS) {
2044 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
2045 "configuration format: %s", line);
2046 goto done;
2050 * Write this line out if:
2051 * - this is a comment line; or
2052 * - we've done updating/deleting the the given VR; or
2053 * - if the name of the VR read from this line does not match
2054 * the VR name that we are about to update/delete;
2056 if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
2057 strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
2058 if (fputs(line, nfp) != EOF)
2059 continue;
2061 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2062 "write line %s", line);
2063 err = VRRP_EDB;
2064 goto done;
2068 * Otherwise, update/skip the line.
2070 found = _B_TRUE;
2071 if (op == VRRP_CONF_DELETE)
2072 continue;
2074 assert(op == VRRP_CONF_UPDATE);
2075 if ((err = vrrpd_write_vrconf(line, sizeof (line),
2076 newconf)) != VRRP_SUCCESS) {
2077 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2078 "update configuration for %s", newconf->vvc_name);
2079 goto done;
2081 if (fputs(line, nfp) == EOF) {
2082 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2083 "write line %s", line);
2084 err = VRRP_EDB;
2085 goto done;
2090 * If we get to the end of the file and have not seen the router that
2091 * we are about to update, write it out.
2093 if (!found && op == VRRP_CONF_UPDATE) {
2094 if ((err = vrrpd_write_vrconf(line, sizeof (line),
2095 newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
2096 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2097 "write line %s", line);
2098 err = VRRP_EDB;
2100 } else if (!found && op == VRRP_CONF_DELETE) {
2101 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
2102 "configuation for %s", newconf->vvc_name);
2103 err = VRRP_ENOTFOUND;
2106 if (err != VRRP_SUCCESS)
2107 goto done;
2109 if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
2110 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2111 "rename file %s", newfile);
2112 err = VRRP_EDB;
2115 done:
2116 (void) fclose(fp);
2117 (void) fclose(nfp);
2118 (void) unlink(newfile);
2119 return (err);
2122 static vrrp_err_t
2123 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
2125 vrrp_prop_t *prop;
2126 int n, i;
2128 vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);
2130 for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2131 prop = &vrrp_prop_info_tbl[i];
2132 n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
2133 prop->vs_propname);
2134 if (n < 0 || n >= len)
2135 break;
2136 len -= n;
2137 line += n;
2138 n = prop->vs_propwrite(conf, line, len);
2139 if (n < 0 || n >= len)
2140 break;
2141 len -= n;
2142 line += n;
2144 if (i != VRRP_PROP_INFO_TABSIZE) {
2145 vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2146 "small", conf->vvc_name);
2147 return (VRRP_EDB);
2149 n = snprintf(line, len, "\n");
2150 if (n < 0 || n >= len) {
2151 vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2152 "small", conf->vvc_name);
2153 return (VRRP_EDB);
2155 return (VRRP_SUCCESS);
2158 static vrrp_err_t
2159 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
2161 char *str, *token;
2162 char *next;
2163 vrrp_err_t err = VRRP_SUCCESS;
2164 char tmpbuf[MAXLINELEN];
2166 str = tmpbuf;
2167 (void) strlcpy(tmpbuf, line, MAXLINELEN);
2170 * Skip leading spaces, blank lines, and comments.
2172 skip_whitespace(str);
2173 if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
2174 conf->vvc_vrid = VRRP_VRID_NONE;
2175 return (VRRP_SUCCESS);
2179 * Read each VR properties.
2181 for (token = strtok_r(str, " \n\t", &next); token != NULL;
2182 token = strtok_r(NULL, " \n\t", &next)) {
2183 if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
2184 break;
2187 /* All properties read but no VRID defined */
2188 if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
2189 err = VRRP_EINVAL;
2191 return (err);
2194 static vrrp_err_t
2195 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
2197 vrrp_prop_t *prop;
2198 char *pstr;
2199 int i;
2201 if ((pstr = strchr(str, '=')) == NULL) {
2202 vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2203 return (VRRP_EINVAL);
2206 *pstr++ = '\0';
2207 for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2208 prop = &vrrp_prop_info_tbl[i];
2209 if (strcasecmp(str, prop->vs_propname) == 0) {
2210 if (prop->vs_propread(conf, pstr))
2211 break;
2215 if (i == VRRP_PROP_INFO_TABSIZE) {
2216 vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2217 return (VRRP_EINVAL);
2220 return (VRRP_SUCCESS);
2223 static boolean_t
2224 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
2226 size_t size = sizeof (conf->vvc_name);
2227 return (strlcpy(conf->vvc_name, str, size) < size);
2230 static boolean_t
2231 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
2233 conf->vvc_vrid = strtol(str, NULL, 0);
2234 return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
2235 conf->vvc_vrid > VRRP_VRID_MAX ||
2236 (conf->vvc_vrid == 0 && errno != 0)));
2239 static boolean_t
2240 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
2242 if (strcasecmp(str, "AF_INET") == 0)
2243 conf->vvc_af = AF_INET;
2244 else if (strcasecmp(str, "AF_INET6") == 0)
2245 conf->vvc_af = AF_INET6;
2246 else
2247 return (_B_FALSE);
2248 return (_B_TRUE);
2251 static boolean_t
2252 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
2254 conf->vvc_pri = strtol(str, NULL, 0);
2255 return (!(conf->vvc_pri < VRRP_PRI_MIN ||
2256 conf->vvc_pri > VRRP_PRI_OWNER ||
2257 (conf->vvc_pri == 0 && errno != 0)));
2260 static boolean_t
2261 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
2263 conf->vvc_adver_int = strtol(str, NULL, 0);
2264 return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2265 conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
2266 (conf->vvc_adver_int == 0 && errno != 0)));
2269 static boolean_t
2270 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
2272 if (strcasecmp(str, "true") == 0)
2273 conf->vvc_preempt = _B_TRUE;
2274 else if (strcasecmp(str, "false") == 0)
2275 conf->vvc_preempt = _B_FALSE;
2276 else
2277 return (_B_FALSE);
2278 return (_B_TRUE);
2281 static boolean_t
2282 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
2284 if (strcasecmp(str, "true") == 0)
2285 conf->vvc_accept = _B_TRUE;
2286 else if (strcasecmp(str, "false") == 0)
2287 conf->vvc_accept = _B_FALSE;
2288 else
2289 return (_B_FALSE);
2290 return (_B_TRUE);
2293 static boolean_t
2294 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
2296 if (strcasecmp(str, "enabled") == 0)
2297 conf->vvc_enabled = _B_TRUE;
2298 else if (strcasecmp(str, "disabled") == 0)
2299 conf->vvc_enabled = _B_FALSE;
2300 else
2301 return (_B_FALSE);
2302 return (_B_TRUE);
2305 static boolean_t
2306 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
2308 size_t size = sizeof (conf->vvc_link);
2309 return (strlcpy(conf->vvc_link, str, size) < size);
2312 static int
2313 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
2315 return (snprintf(str, size, "%s", conf->vvc_name));
2318 static int
2319 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
2321 return (snprintf(str, size, "%d", conf->vvc_pri));
2324 static int
2325 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
2327 return (snprintf(str, size, "%d", conf->vvc_adver_int));
2330 static int
2331 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
2333 return (snprintf(str, size, "%s",
2334 conf->vvc_preempt ? "true" : "false"));
2337 static int
2338 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
2340 return (snprintf(str, size, "%s",
2341 conf->vvc_accept ? "true" : "false"));
2344 static int
2345 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
2347 return (snprintf(str, size, "%s",
2348 conf->vvc_enabled ? "enabled" : "disabled"));
2351 static int
2352 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
2354 return (snprintf(str, size, "%d", conf->vvc_vrid));
2357 static int
2358 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
2360 return (snprintf(str, size, "%s",
2361 conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
2364 static int
2365 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
2367 return (snprintf(str, size, "%s", conf->vvc_link));
2370 static char *
2371 af_str(int af)
2373 if (af == 4 || af == AF_INET)
2374 return ("AF_INET");
2375 else if (af == 6 || af == AF_INET6)
2376 return ("AF_INET6");
2377 else if (af == AF_UNSPEC)
2378 return ("AF_UNSPEC");
2379 else
2380 return ("AF_error");
2383 static vrrp_err_t
2384 vrrpd_create_vr(vrrp_vr_conf_t *conf)
2386 vrrp_vr_t *vr;
2388 vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);
2390 if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
2391 vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
2392 " failed", conf->vvc_name);
2393 return (VRRP_ENOMEM);
2396 bzero(vr, sizeof (vrrp_vr_t));
2397 vr->vvr_state = VRRP_STATE_NONE;
2398 vr->vvr_timer_id = -1;
2399 vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
2400 (void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
2401 vr->vvr_conf.vvc_enabled = _B_FALSE;
2402 TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
2403 return (VRRP_SUCCESS);
2406 static void
2407 vrrpd_delete_vr(vrrp_vr_t *vr)
2409 vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
2410 if (vr->vvr_conf.vvc_enabled)
2411 vrrpd_disable_vr(vr, NULL, _B_FALSE);
2412 assert(vr->vvr_state == VRRP_STATE_INIT);
2413 vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
2414 TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
2415 (void) free(vr);
2418 static vrrp_err_t
2419 vrrpd_enable_vr(vrrp_vr_t *vr)
2421 vrrp_err_t rx_err, tx_err, err = VRRP_EINVAL;
2423 vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);
2425 assert(vr->vvr_conf.vvc_enabled);
2428 * This VRRP router has been successfully enabled and start
2429 * participating.
2431 if (vr->vvr_state != VRRP_STATE_INIT)
2432 return (VRRP_SUCCESS);
2434 if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
2436 * Select the primary IP address. Even if this time
2437 * primary IP selection failed, we will reselect the
2438 * primary IP address when new IP address comes up.
2440 vrrpd_reselect_primary(vr->vvr_pif);
2441 if (vr->vvr_pif->vvi_pip == NULL) {
2442 vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
2443 "select_primary over %s failed",
2444 vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
2445 rx_err = VRRP_ENOPRIM;
2450 * Initialize the TX socket used for this vrrp_vr_t to send the
2451 * multicast packets.
2453 tx_err = vrrpd_init_txsock(vr);
2456 * Only start the state transition if sockets for both RX and TX are
2457 * initialized correctly.
2459 if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
2461 * Record the error information for diagnose purpose.
2463 vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
2464 return (err);
2467 if (vr->vvr_conf.vvc_pri == 255)
2468 err = vrrpd_state_i2m(vr);
2469 else
2470 err = vrrpd_state_i2b(vr);
2472 if (err != VRRP_SUCCESS) {
2473 vr->vvr_err = err;
2474 vr->vvr_pif->vvi_pip = NULL;
2475 vrrpd_fini_txsock(vr);
2476 vrrpd_fini_rxsock(vr);
2478 return (err);
2482 * Given the removed interface, see whether the given VRRP router would
2483 * be affected and stop participating the VRRP protocol.
2485 * If intf is NULL, VR disabling request is coming from the admin.
2487 static void
2488 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
2490 vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
2491 intf == NULL ? "requested by admin" : intf->vvi_ifname,
2492 intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
2493 "interface deleted"));
2496 * An interface is deleted, see whether this interface is the
2497 * physical interface or the VNIC of the given VRRP router.
2498 * If so, continue to disable the VRRP router.
2500 if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
2501 (intf != vr->vvr_vif)) {
2502 return;
2506 * If this is the case that the primary IP address is gone,
2507 * and we failed to reselect another primary IP address,
2508 * continue to disable the VRRP router.
2510 if (primary_addr_gone && intf != vr->vvr_pif)
2511 return;
2513 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
2514 vr->vvr_conf.vvc_name);
2516 if (vr->vvr_state == VRRP_STATE_MASTER) {
2518 * If this router is disabled by the administrator, send
2519 * the zero-priority advertisement to indicate the Master
2520 * stops participating VRRP.
2522 if (intf == NULL)
2523 (void) vrrpd_send_adv(vr, _B_TRUE);
2525 vrrpd_state_m2i(vr);
2526 } else if (vr->vvr_state == VRRP_STATE_BACKUP) {
2527 vrrpd_state_b2i(vr);
2531 * If no primary IP address can be selected, the VRRP router
2532 * stays at the INIT state and will become BACKUP and MASTER when
2533 * a primary IP address is reselected.
2535 if (primary_addr_gone) {
2536 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
2537 "is removed", vr->vvr_conf.vvc_name);
2538 vr->vvr_err = VRRP_ENOPRIM;
2539 } else if (intf == NULL) {
2541 * The VRRP router is disable by the administrator
2543 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
2544 vr->vvr_conf.vvc_name);
2545 vr->vvr_err = VRRP_SUCCESS;
2546 vrrpd_fini_txsock(vr);
2547 vrrpd_fini_rxsock(vr);
2548 } else if (intf == vr->vvr_pif) {
2549 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
2550 "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2551 vr->vvr_err = VRRP_ENOPRIM;
2552 vrrpd_fini_rxsock(vr);
2553 } else if (intf == vr->vvr_vif) {
2554 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
2555 " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2556 vr->vvr_err = VRRP_ENOVIRT;
2557 vrrpd_fini_txsock(vr);
2561 vrrp_err_t
2562 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
2564 vrrp_err_t err = VRRP_SUCCESS;
2566 vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
2567 conf->vvc_link, conf->vvc_vrid);
2569 assert(conf != NULL);
2572 * Sanity check
2574 if ((strlen(conf->vvc_name) == 0) ||
2575 (strlen(conf->vvc_link) == 0) ||
2576 (conf->vvc_vrid < VRRP_VRID_MIN ||
2577 conf->vvc_vrid > VRRP_VRID_MAX) ||
2578 (conf->vvc_pri < VRRP_PRI_MIN ||
2579 conf->vvc_pri > VRRP_PRI_OWNER) ||
2580 (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2581 conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
2582 (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
2583 (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
2584 vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
2585 conf->vvc_name);
2586 return (VRRP_EINVAL);
2589 if (!vrrp_valid_name(conf->vvc_name)) {
2590 vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
2591 "name", conf->vvc_name);
2592 return (VRRP_EINVALVRNAME);
2595 if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
2596 vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
2597 conf->vvc_name);
2598 return (VRRP_EINSTEXIST);
2601 if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
2602 conf->vvc_af) != NULL) {
2603 vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
2604 "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
2605 conf->vvc_link);
2606 return (VRRP_EVREXIST);
2609 if (updateconf && (err = vrrpd_updateconf(conf,
2610 VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2611 vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
2612 "configuration for %s", conf->vvc_name);
2613 return (err);
2616 err = vrrpd_create_vr(conf);
2617 if (err != VRRP_SUCCESS && updateconf)
2618 (void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);
2620 return (err);
2623 static vrrp_err_t
2624 vrrpd_delete(const char *vn)
2626 vrrp_vr_t *vr;
2627 vrrp_err_t err;
2629 vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);
2631 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2632 vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
2633 return (VRRP_ENOTFOUND);
2636 err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
2637 if (err != VRRP_SUCCESS) {
2638 vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
2639 "configuration for %s", vr->vvr_conf.vvc_name);
2640 return (err);
2643 vrrpd_delete_vr(vr);
2644 return (VRRP_SUCCESS);
2647 static vrrp_err_t
2648 vrrpd_enable(const char *vn, boolean_t updateconf)
2650 vrrp_vr_t *vr;
2651 vrrp_vr_conf_t *conf;
2652 uint32_t flags;
2653 datalink_class_t class;
2654 vrrp_err_t err = VRRP_SUCCESS;
2656 vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);
2658 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2659 vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
2660 return (VRRP_ENOTFOUND);
2664 * The VR is already enabled.
2666 conf = &vr->vvr_conf;
2667 if (conf->vvc_enabled) {
2668 vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
2669 "enabled", vn);
2670 return (VRRP_EALREADY);
2674 * Check whether the link exists.
2676 if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
2677 conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
2678 !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
2679 (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR) &&
2680 (class != DATALINK_CLASS_VNIC))) {
2681 vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
2682 vn, conf->vvc_link);
2683 return (VRRP_EINVALLINK);
2687 * Get the associated VNIC name by the given interface/vrid/
2688 * address famitly.
2690 err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
2691 conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
2692 sizeof (vr->vvr_vnic));
2693 if (err != VRRP_SUCCESS) {
2694 vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
2695 "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
2696 conf->vvc_link);
2697 err = VRRP_ENOVNIC;
2698 goto fail;
2702 * Find the right VNIC, primary interface and get the list of the
2703 * protected IP adressses and primary IP address. Note that if
2704 * either interface is NULL (no IP addresses configured over the
2705 * interface), we will still continue and mark this VRRP router
2706 * as "enabled".
2708 vr->vvr_conf.vvc_enabled = _B_TRUE;
2709 if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
2710 VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2711 vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
2712 "configuration for %s", vr->vvr_conf.vvc_name);
2713 goto fail;
2717 * If vrrpd_setup_vr() fails, it is possible that there is no IP
2718 * addresses over ether the primary interface or the VNIC yet,
2719 * return success in this case, the VRRP router will stay in
2720 * the initialized state and start to work when the IP address is
2721 * configured.
2723 (void) vrrpd_enable_vr(vr);
2724 return (VRRP_SUCCESS);
2726 fail:
2727 vr->vvr_conf.vvc_enabled = _B_FALSE;
2728 vr->vvr_vnic[0] = '\0';
2729 return (err);
2732 static vrrp_err_t
2733 vrrpd_disable(const char *vn)
2735 vrrp_vr_t *vr;
2736 vrrp_err_t err;
2738 vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);
2740 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2741 vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
2742 return (VRRP_ENOTFOUND);
2746 * The VR is already disable.
2748 if (!vr->vvr_conf.vvc_enabled) {
2749 vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
2750 return (VRRP_EALREADY);
2753 vr->vvr_conf.vvc_enabled = _B_FALSE;
2754 err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2755 if (err != VRRP_SUCCESS) {
2756 vr->vvr_conf.vvc_enabled = _B_TRUE;
2757 vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
2758 "configuration for %s", vr->vvr_conf.vvc_name);
2759 return (err);
2762 vrrpd_disable_vr(vr, NULL, _B_FALSE);
2763 vr->vvr_vnic[0] = '\0';
2764 return (VRRP_SUCCESS);
2767 static vrrp_err_t
2768 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
2770 vrrp_vr_t *vr;
2771 vrrp_vr_conf_t savconf;
2772 int pri;
2773 boolean_t accept, set_accept = _B_FALSE;
2774 vrrp_err_t err;
2776 vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);
2778 if (mask == 0)
2779 return (VRRP_SUCCESS);
2781 if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
2782 vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
2783 "VR instance: %s", conf->vvc_name);
2784 return (VRRP_ENOTFOUND);
2787 if (mask & VRRP_CONF_INTERVAL) {
2788 if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2789 conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
2790 vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2791 "adver_interval %d", conf->vvc_name,
2792 conf->vvc_adver_int);
2793 return (VRRP_EINVAL);
2797 pri = vr->vvr_conf.vvc_pri;
2798 if (mask & VRRP_CONF_PRIORITY) {
2799 if (conf->vvc_pri < VRRP_PRI_MIN ||
2800 conf->vvc_pri > VRRP_PRI_OWNER) {
2801 vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2802 "priority %d", conf->vvc_name, conf->vvc_pri);
2803 return (VRRP_EINVAL);
2805 pri = conf->vvc_pri;
2808 accept = vr->vvr_conf.vvc_accept;
2809 if (mask & VRRP_CONF_ACCEPT)
2810 accept = conf->vvc_accept;
2812 if (pri == VRRP_PRI_OWNER && !accept) {
2813 vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
2814 "true for VRRP address owner", conf->vvc_name);
2815 return (VRRP_EINVAL);
2818 if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
2819 err = vrrpd_set_noaccept(vr, !accept);
2820 if (err != VRRP_SUCCESS) {
2821 vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
2822 "updating failed: %s", conf->vvc_name,
2823 vrrp_err2str(err));
2824 return (err);
2826 set_accept = _B_TRUE;
2830 * Save the current configuration, so it can be restored if the
2831 * following fails.
2833 (void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
2834 if (mask & VRRP_CONF_PREEMPT)
2835 vr->vvr_conf.vvc_preempt = conf->vvc_preempt;
2837 if (mask & VRRP_CONF_ACCEPT)
2838 vr->vvr_conf.vvc_accept = accept;
2840 if (mask & VRRP_CONF_PRIORITY)
2841 vr->vvr_conf.vvc_pri = pri;
2843 if (mask & VRRP_CONF_INTERVAL)
2844 vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;
2846 err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2847 if (err != VRRP_SUCCESS) {
2848 vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
2849 "failed: %s", conf->vvc_name, vrrp_err2str(err));
2850 if (set_accept)
2851 (void) vrrpd_set_noaccept(vr, accept);
2852 (void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
2853 return (err);
2856 if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
2857 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
2859 if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
2860 vr->vvr_timeout = conf->vvc_adver_int;
2862 return (VRRP_SUCCESS);
2865 static void
2866 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
2867 size_t *sizep)
2869 vrrp_vr_t *vr;
2870 char *p = (char *)ret + sizeof (vrrp_ret_list_t);
2871 size_t size = (*sizep) - sizeof (vrrp_ret_list_t);
2873 vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));
2875 ret->vrl_cnt = 0;
2876 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
2877 if (vrid != VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
2878 continue;
2880 if (strlen(ifname) != 0 && strcmp(ifname,
2881 vr->vvr_conf.vvc_link) == 0) {
2882 continue;
2885 if ((af == AF_INET || af == AF_INET6) &&
2886 vr->vvr_conf.vvc_af != af)
2887 continue;
2889 if (size < VRRP_NAME_MAX) {
2890 vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
2891 "small to hold %d router names", ret->vrl_cnt);
2892 *sizep = sizeof (vrrp_ret_list_t);
2893 ret->vrl_err = VRRP_ETOOSMALL;
2894 return;
2896 (void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
2897 p += (strlen(vr->vvr_conf.vvc_name) + 1);
2898 ret->vrl_cnt++;
2899 size -= VRRP_NAME_MAX;
2902 *sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
2903 vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
2904 ret->vrl_err = VRRP_SUCCESS;
2907 static void
2908 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
2910 vrrp_queryinfo_t *infop;
2911 vrrp_vr_t *vr;
2912 vrrp_intf_t *vif;
2913 vrrp_ip_t *ip;
2914 struct timeval now;
2915 uint32_t vipcnt = 0;
2916 size_t size = *sizep;
2918 vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);
2920 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2921 vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
2922 *sizep = sizeof (vrrp_ret_query_t);
2923 ret->vrq_err = VRRP_ENOTFOUND;
2924 return;
2928 * Get the virtual IP list if the router is not in the INIT state.
2930 if (vr->vvr_state != VRRP_STATE_INIT) {
2931 vif = vr->vvr_vif;
2932 TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2933 vipcnt++;
2937 *sizep = sizeof (vrrp_ret_query_t);
2938 *sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
2939 if (*sizep > size) {
2940 vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
2941 "%d virtual IPs", vipcnt);
2942 *sizep = sizeof (vrrp_ret_query_t);
2943 ret->vrq_err = VRRP_ETOOSMALL;
2944 return;
2947 (void) gettimeofday(&now, NULL);
2949 bzero(ret, *sizep);
2950 infop = &ret->vrq_qinfo;
2951 (void) memcpy(&infop->show_vi,
2952 &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
2953 (void) memcpy(&infop->show_vs,
2954 &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
2955 (void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
2956 infop->show_vt.vt_since_last_tran = timeval_to_milli(
2957 timeval_delta(now, vr->vvr_sinfo.vs_st_time));
2959 if (vr->vvr_state == VRRP_STATE_INIT) {
2960 ret->vrq_err = VRRP_SUCCESS;
2961 return;
2964 vipcnt = 0;
2965 TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2966 (void) memcpy(&infop->show_va.va_vips[vipcnt++],
2967 &ip->vip_addr, sizeof (vrrp_addr_t));
2969 infop->show_va.va_vipcnt = vipcnt;
2971 (void) memcpy(&infop->show_va.va_primary,
2972 &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));
2974 (void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));
2977 * Check whether there is a peer.
2979 if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
2980 &(vr->vvr_peer.vp_addr))) {
2981 infop->show_vt.vt_since_last_adv = timeval_to_milli(
2982 timeval_delta(now, vr->vvr_peer.vp_time));
2985 if (vr->vvr_state == VRRP_STATE_BACKUP) {
2986 infop->show_vt.vt_master_down_intv =
2987 MASTER_DOWN_INTERVAL_VR(vr);
2990 ret->vrq_err = VRRP_SUCCESS;
2994 * Build the VRRP packet (not including the IP header). Return the
2995 * payload length.
2997 * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
2998 * advertisement which is sent by the Master to indicate that it has been
2999 * stopped participating in VRRP.
3001 static size_t
3002 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
3004 /* LINTED E_BAD_PTR_CAST_ALIGN */
3005 vrrp_pkt_t *vp = (vrrp_pkt_t *)buf;
3006 /* LINTED E_BAD_PTR_CAST_ALIGN */
3007 struct in_addr *a4 = (struct in_addr *)(vp + 1);
3008 /* LINTED E_BAD_PTR_CAST_ALIGN */
3009 struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
3010 vrrp_intf_t *vif = vr->vvr_vif;
3011 vrrp_ip_t *vip;
3012 int af = vif->vvi_af;
3013 size_t size = sizeof (vrrp_pkt_t);
3014 uint16_t rsvd_adver_int;
3015 int nip = 0;
3017 vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
3018 vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
3019 vr->vvr_conf.vvc_adver_int);
3021 TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
3022 if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
3023 sizeof (struct in6_addr))) > buflen) {
3024 vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
3025 "not big enough %d", vr->vvr_conf.vvc_name, size);
3026 return (0);
3029 if (af == AF_INET)
3030 a4[nip++] = vip->vip_addr.in4.sin_addr;
3031 else
3032 a6[nip++] = vip->vip_addr.in6.sin6_addr;
3035 if (nip == 0) {
3036 vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
3037 "address", vr->vvr_conf.vvc_name);
3038 return (0);
3041 vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
3042 vp->vp_vrid = vr->vvr_conf.vvc_vrid;
3043 vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;
3045 rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
3046 vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
3047 vp->vp_ipnum = nip;
3050 * Set the checksum to 0 first, then caculate it.
3052 vp->vp_chksum = 0;
3053 if (af == AF_INET) {
3054 vp->vp_chksum = vrrp_cksum4(
3055 &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
3056 &vrrp_muladdr4.in4.sin_addr, size, vp);
3057 } else {
3058 vp->vp_chksum = vrrp_cksum6(
3059 &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3060 &vrrp_muladdr6.in6.sin6_addr, size, vp);
3063 return (size);
3067 * We need to build the IPv4 header on our own.
3069 static vrrp_err_t
3070 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3072 /* LINTED E_BAD_PTR_CAST_ALIGN */
3073 struct ip *ip = (struct ip *)buf;
3074 size_t plen;
3076 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);
3078 if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
3079 len - sizeof (struct ip), zero_pri)) == 0) {
3080 return (VRRP_ETOOSMALL);
3083 ip->ip_hl = sizeof (struct ip) >> 2;
3084 ip->ip_v = IPV4_VERSION;
3085 ip->ip_tos = 0;
3086 plen += sizeof (struct ip);
3087 ip->ip_len = htons(plen);
3088 ip->ip_off = 0;
3089 ip->ip_ttl = VRRP_IP_TTL;
3090 ip->ip_p = IPPROTO_VRRP;
3091 ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
3092 ip->ip_dst = vrrp_muladdr4.in4.sin_addr;
3095 * The kernel will set the IP cksum and the IPv4 identification.
3097 ip->ip_id = 0;
3098 ip->ip_sum = 0;
3100 if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
3101 (const struct sockaddr *)&vrrp_muladdr4,
3102 sizeof (struct sockaddr_in))) != plen) {
3103 vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
3104 "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
3105 vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
3106 af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
3107 return (VRRP_ESYS);
3110 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
3111 vr->vvr_conf.vvc_name);
3112 return (VRRP_SUCCESS);
3115 static vrrp_err_t
3116 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3118 struct msghdr msg6;
3119 size_t hoplimit_space = 0;
3120 size_t pktinfo_space = 0;
3121 size_t bufspace = 0;
3122 struct in6_pktinfo *pktinfop;
3123 struct cmsghdr *cmsgp;
3124 uchar_t *cmsg_datap;
3125 struct iovec iov;
3126 size_t plen;
3128 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);
3130 if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
3131 return (VRRP_ETOOSMALL);
3133 msg6.msg_control = NULL;
3134 msg6.msg_controllen = 0;
3136 hoplimit_space = sizeof (int);
3137 bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3138 hoplimit_space + _MAX_ALIGNMENT;
3140 pktinfo_space = sizeof (struct in6_pktinfo);
3141 bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3142 pktinfo_space + _MAX_ALIGNMENT;
3145 * We need to temporarily set the msg6.msg_controllen to bufspace
3146 * (we will later trim it to actual length used). This is needed because
3147 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
3149 bufspace += sizeof (struct cmsghdr);
3150 msg6.msg_controllen = bufspace;
3152 msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
3153 if (msg6.msg_control == NULL) {
3154 vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
3155 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3156 return (VRRP_ENOMEM);
3159 cmsgp = CMSG_FIRSTHDR(&msg6);
3161 cmsgp->cmsg_level = IPPROTO_IPV6;
3162 cmsgp->cmsg_type = IPV6_HOPLIMIT;
3163 cmsg_datap = CMSG_DATA(cmsgp);
3164 /* LINTED */
3165 *(int *)cmsg_datap = VRRP_IP_TTL;
3166 cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
3167 cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3169 cmsgp->cmsg_level = IPPROTO_IPV6;
3170 cmsgp->cmsg_type = IPV6_PKTINFO;
3171 cmsg_datap = CMSG_DATA(cmsgp);
3173 /* LINTED */
3174 pktinfop = (struct in6_pktinfo *)cmsg_datap;
3176 * We don't know if pktinfop->ipi6_addr is aligned properly,
3177 * therefore let's use bcopy, instead of assignment.
3179 (void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3180 &pktinfop->ipi6_addr, sizeof (struct in6_addr));
3183 * We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
3185 pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
3186 cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
3187 cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3188 msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;
3190 msg6.msg_name = &vrrp_muladdr6;
3191 msg6.msg_namelen = sizeof (struct sockaddr_in6);
3193 iov.iov_base = buf;
3194 iov.iov_len = plen;
3195 msg6.msg_iov = &iov;
3196 msg6.msg_iovlen = 1;
3198 if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
3199 (const struct msghdr *)&msg6, 0)) != plen) {
3200 vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
3201 "%s expect %d sent %d", vr->vvr_conf.vvc_name,
3202 strerror(errno), plen, len);
3203 (void) free(msg6.msg_control);
3204 return (VRRP_ESYS);
3207 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
3208 vr->vvr_conf.vvc_name);
3209 (void) free(msg6.msg_control);
3210 return (VRRP_SUCCESS);
3214 * Send the VRRP advertisement packets.
3216 static vrrp_err_t
3217 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
3219 uint64_t buf[(IP_MAXPACKET + 1)/8];
3221 vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
3222 vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");
3224 assert(vr->vvr_pif->vvi_pip != NULL);
3226 if (vr->vvr_pif->vvi_pip == NULL) {
3227 vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
3228 "address", vr->vvr_conf.vvc_name);
3229 return (VRRP_EINVAL);
3232 if (vr->vvr_conf.vvc_af == AF_INET) {
3233 return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
3234 sizeof (buf), zero_pri));
3235 } else {
3236 return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
3237 sizeof (buf), zero_pri));
3241 static void
3242 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
3244 vrrp_vr_conf_t *conf = &vr->vvr_conf;
3245 char peer[INET6_ADDRSTRLEN];
3246 char local[INET6_ADDRSTRLEN];
3247 int addr_cmp;
3248 uint16_t peer_adver_int;
3250 /* LINTED E_CONSTANT_CONDITION */
3251 VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
3252 _B_FALSE);
3253 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
3254 peer);
3256 if (vr->vvr_state <= VRRP_STATE_INIT) {
3257 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
3258 "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
3259 return;
3262 peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);
3264 /* LINTED E_CONSTANT_CONDITION */
3265 VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
3266 local, INET6_ADDRSTRLEN, _B_FALSE);
3267 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
3268 "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
3269 vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
3270 vp->vp_prio, peer_adver_int);
3272 addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
3273 &vr->vvr_pif->vvi_pip->vip_addr);
3274 if (addr_cmp == 0) {
3275 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
3276 conf->vvc_name);
3277 return;
3278 } else if (conf->vvc_pri == vp->vp_prio) {
3279 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
3280 " than the local IP %s", conf->vvc_name, peer,
3281 addr_cmp > 0 ? "greater" : "less", local);
3284 if (conf->vvc_pri == 255) {
3285 vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
3286 "owner received advertisement from %s", conf->vvc_name,
3287 peer);
3288 return;
3291 (void) gettimeofday(&vr->vvr_peer_time, NULL);
3292 (void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
3293 vr->vvr_peer_prio = vp->vp_prio;
3294 vr->vvr_peer_adver_int = peer_adver_int;
3296 if (vr->vvr_state == VRRP_STATE_BACKUP) {
3297 vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
3298 if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
3299 (conf->vvc_preempt == _B_FALSE ||
3300 vp->vp_prio >= conf->vvc_pri)) {
3301 (void) iu_cancel_timer(vrrpd_timerq,
3302 vr->vvr_timer_id, NULL);
3303 if (vp->vp_prio == VRRP_PRIO_ZERO) {
3304 /* the master stops participating in VRRP */
3305 vr->vvr_timeout = SKEW_TIME_VR(vr);
3306 } else {
3307 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
3309 if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3310 vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
3311 vr)) == -1) {
3312 vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3313 "start vrrp_b2m_timeout(%d) failed",
3314 conf->vvc_name, vr->vvr_timeout);
3315 } else {
3316 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3317 "start vrrp_b2m_timeout(%d)",
3318 conf->vvc_name, vr->vvr_timeout);
3321 } else if (vr->vvr_state == VRRP_STATE_MASTER) {
3322 if (vp->vp_prio == VRRP_PRIO_ZERO) {
3323 (void) vrrpd_send_adv(vr, _B_FALSE);
3324 (void) iu_cancel_timer(vrrpd_timerq,
3325 vr->vvr_timer_id, NULL);
3326 if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3327 vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
3328 vr)) == -1) {
3329 vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3330 "start vrrp_adv_timeout(%d) failed",
3331 conf->vvc_name, vr->vvr_timeout);
3332 } else {
3333 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3334 "start vrrp_adv_timeout(%d)",
3335 conf->vvc_name, vr->vvr_timeout);
3337 } else if (vp->vp_prio > conf->vvc_pri ||
3338 (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
3339 (void) vrrpd_state_m2b(vr);
3341 } else {
3342 assert(_B_FALSE);
3346 static vrrp_err_t
3347 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
3348 vrrp_addr_t *from)
3350 vrrp_vr_t *vr;
3351 uint8_t vers_type;
3352 uint16_t saved_cksum, cksum;
3353 char peer[INET6_ADDRSTRLEN];
3355 /* LINTED E_CONSTANT_CONDITION */
3356 VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
3357 vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
3358 peer);
3360 if (len < sizeof (vrrp_pkt_t)) {
3361 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
3362 "length %d", len);
3363 return (VRRP_EINVAL);
3367 * Verify: VRRP version number and packet type.
3369 vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
3370 if (vers_type != VRRP_VERSION) {
3371 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
3372 "version %d", pif->vvi_ifname, vers_type);
3373 return (VRRP_EINVAL);
3376 if (vp->vp_ipnum == 0) {
3377 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
3378 pif->vvi_ifname);
3379 return (VRRP_EINVAL);
3382 if (len - sizeof (vrrp_pkt_t) !=
3383 vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
3384 sizeof (struct in6_addr))) {
3385 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
3386 " %d", pif->vvi_ifname, vp->vp_ipnum);
3387 return (VRRP_EINVAL);
3390 vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);
3393 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
3394 * order checksum value;
3396 saved_cksum = vp->vp_chksum;
3397 vp->vp_chksum = 0;
3398 if (pif->vvi_af == AF_INET) {
3399 cksum = vrrp_cksum4(&from->in4.sin_addr,
3400 &vrrp_muladdr4.in4.sin_addr, len, vp);
3401 } else {
3402 cksum = vrrp_cksum6(&from->in6.sin6_addr,
3403 &vrrp_muladdr6.in6.sin6_addr, len, vp);
3406 if (cksum != saved_cksum) {
3407 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
3408 "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
3409 cksum, saved_cksum);
3410 return (VRRP_EINVAL);
3413 if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
3414 pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
3415 vrrpd_process_adv(vr, from, vp);
3416 } else {
3417 vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
3418 "not configured", pif->vvi_ifname, vp->vp_vrid,
3419 af_str(pif->vvi_af));
3421 return (VRRP_SUCCESS);
3425 * IPv4 socket, the IPv4 header is included.
3427 static vrrp_err_t
3428 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3430 char abuf[INET6_ADDRSTRLEN];
3431 struct ip *ip;
3433 vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
3434 pif->vvi_ifname, len);
3436 ip = (struct ip *)msgp->msg_iov->iov_base;
3438 /* Sanity check */
3439 if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
3440 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
3441 "%d", pif->vvi_ifname, len);
3442 return (VRRP_EINVAL);
3445 assert(ip->ip_v == IPV4_VERSION);
3446 assert(ip->ip_p == IPPROTO_VRRP);
3447 assert(msgp->msg_namelen == sizeof (struct sockaddr_in));
3449 if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
3450 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3451 "destination %s", pif->vvi_ifname,
3452 inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
3453 return (VRRP_EINVAL);
3456 if (ip->ip_ttl != VRRP_IP_TTL) {
3457 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3458 "ttl %d", pif->vvi_ifname, ip->ip_ttl);
3459 return (VRRP_EINVAL);
3463 * Note that the ip_len contains only the IP payload length.
3465 return (vrrpd_process_vrrp(pif,
3466 /* LINTED E_BAD_PTR_CAST_ALIGN */
3467 (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
3468 (vrrp_addr_t *)msgp->msg_name));
3472 * IPv6 socket, check the ancillary_data.
3474 static vrrp_err_t
3475 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3477 struct cmsghdr *cmsgp;
3478 uchar_t *cmsg_datap;
3479 struct in6_pktinfo *pktinfop;
3480 char abuf[INET6_ADDRSTRLEN];
3481 int ttl;
3483 vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
3484 pif->vvi_ifname, len);
3486 /* Sanity check */
3487 if (len < sizeof (vrrp_pkt_t)) {
3488 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
3489 "%d", pif->vvi_ifname, len);
3490 return (VRRP_EINVAL);
3493 assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));
3495 for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
3496 cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
3497 assert(cmsgp->cmsg_level == IPPROTO_IPV6);
3498 cmsg_datap = CMSG_DATA(cmsgp);
3500 switch (cmsgp->cmsg_type) {
3501 case IPV6_HOPLIMIT:
3502 /* LINTED E_BAD_PTR_CAST_ALIGN */
3503 if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
3504 break;
3506 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3507 "ttl %d", pif->vvi_ifname, ttl);
3508 return (VRRP_EINVAL);
3509 case IPV6_PKTINFO:
3510 /* LINTED E_BAD_PTR_CAST_ALIGN */
3511 pktinfop = (struct in6_pktinfo *)cmsg_datap;
3512 if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
3513 &vrrp_muladdr6.in6.sin6_addr)) {
3514 break;
3517 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3518 "destination %s", pif->vvi_ifname,
3519 inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
3520 sizeof (abuf)));
3521 return (VRRP_EINVAL);
3525 return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
3526 msgp->msg_name));
3529 /* ARGSUSED */
3530 static void
3531 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
3532 void *arg)
3534 struct msghdr msg;
3535 vrrp_addr_t from;
3536 uint64_t buf[(IP_MAXPACKET + 1)/8];
3537 uint64_t ancillary_data[(IP_MAXPACKET + 1)/8];
3538 vrrp_intf_t *pif = arg;
3539 int af = pif->vvi_af;
3540 int len;
3541 struct iovec iov;
3543 vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);
3545 msg.msg_name = (struct sockaddr *)&from;
3546 msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
3547 sizeof (struct sockaddr_in6);
3548 iov.iov_base = (char *)buf;
3549 iov.iov_len = sizeof (buf);
3550 msg.msg_iov = &iov;
3551 msg.msg_iovlen = 1;
3552 msg.msg_control = ancillary_data;
3553 msg.msg_controllen = sizeof (ancillary_data);
3555 if ((len = recvmsg(s, &msg, 0)) == -1) {
3556 vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
3557 "failed: %s", pif->vvi_ifname, strerror(errno));
3558 return;
3562 * Ignore packets whose control buffers that don't fit
3564 if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
3565 vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
3566 "big enough", pif->vvi_ifname);
3567 return;
3570 if (af == AF_INET)
3571 (void) vrrpd_process_adv_v4(pif, &msg, len);
3572 else
3573 (void) vrrpd_process_adv_v6(pif, &msg, len);
3577 * Create the socket which is used to receive VRRP packets. Virtual routers
3578 * that configured on the same physical interface share the same socket.
3580 static vrrp_err_t
3581 vrrpd_init_rxsock(vrrp_vr_t *vr)
3583 vrrp_intf_t *pif; /* Physical interface used to recv packets */
3584 struct group_req greq;
3585 struct sockaddr_storage *muladdr;
3586 int af, proto;
3587 int on = 1;
3588 vrrp_err_t err = VRRP_SUCCESS;
3590 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);
3593 * The RX sockets may already been initialized.
3595 if ((pif = vr->vvr_pif) != NULL) {
3596 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
3597 vr->vvr_conf.vvc_name, pif->vvi_ifname);
3598 assert(pif->vvi_sockfd != -1);
3599 return (VRRP_SUCCESS);
3603 * If no IP addresses configured on the primary interface,
3604 * return failure.
3606 af = vr->vvr_conf.vvc_af;
3607 pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
3608 if (pif == NULL) {
3609 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
3610 "over %s/%s", vr->vvr_conf.vvc_name,
3611 vr->vvr_conf.vvc_link, af_str(af));
3612 return (VRRP_ENOPRIM);
3615 proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
3616 if (pif->vvi_nvr++ == 0) {
3617 assert(pif->vvi_sockfd < 0);
3618 pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
3619 if (pif->vvi_sockfd < 0) {
3620 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
3621 "failed %s", vr->vvr_conf.vvc_name,
3622 strerror(errno));
3623 err = VRRP_ESYS;
3624 goto done;
3628 * Join the multicast group to receive VRRP packets.
3630 if (af == AF_INET) {
3631 muladdr = (struct sockaddr_storage *)
3632 (void *)&vrrp_muladdr4;
3633 } else {
3634 muladdr = (struct sockaddr_storage *)
3635 (void *)&vrrp_muladdr6;
3638 greq.gr_interface = pif->vvi_ifindex;
3639 (void) memcpy(&greq.gr_group, muladdr,
3640 sizeof (struct sockaddr_storage));
3641 if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
3642 sizeof (struct group_req)) < 0) {
3643 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3644 "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
3645 pif->vvi_ifindex, strerror(errno));
3646 err = VRRP_ESYS;
3647 goto done;
3648 } else {
3649 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
3650 "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
3651 pif->vvi_ifindex);
3655 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
3656 * when a packet is received. Call setsockopt() to receive such
3657 * information.
3659 if (af == AF_INET6) {
3661 * Enable receipt of destination address info
3663 if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
3664 (char *)&on, sizeof (on)) < 0) {
3665 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3666 "enable recvpktinfo failed: %s",
3667 vr->vvr_conf.vvc_name, strerror(errno));
3668 err = VRRP_ESYS;
3669 goto done;
3673 * Enable receipt of hoplimit info
3675 if (setsockopt(pif->vvi_sockfd, proto,
3676 IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
3677 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3678 "enable recvhoplimit failed: %s",
3679 vr->vvr_conf.vvc_name, strerror(errno));
3680 err = VRRP_ESYS;
3681 goto done;
3685 if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
3686 pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
3687 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3688 "iu_register_event() failed",
3689 vr->vvr_conf.vvc_name);
3690 err = VRRP_ESYS;
3691 goto done;
3693 } else {
3694 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
3695 "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
3696 pif->vvi_nvr);
3697 assert(IS_PRIMARY_INTF(pif));
3700 done:
3701 vr->vvr_pif = pif;
3702 if (err != VRRP_SUCCESS)
3703 vrrpd_fini_rxsock(vr);
3705 return (err);
3709 * Delete the socket which is used to receive VRRP packets for the given
3710 * VRRP router. Since all virtual routers that configured on the same
3711 * physical interface share the same socket, the socket is only closed
3712 * when the last VRRP router share this socket is deleted.
3714 static void
3715 vrrpd_fini_rxsock(vrrp_vr_t *vr)
3717 vrrp_intf_t *pif = vr->vvr_pif;
3719 vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);
3721 if (pif == NULL)
3722 return;
3724 if (--pif->vvi_nvr == 0) {
3725 vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
3726 vr->vvr_conf.vvc_name, pif->vvi_ifname);
3727 (void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
3728 (void) close(pif->vvi_sockfd);
3729 pif->vvi_pip = NULL;
3730 pif->vvi_sockfd = -1;
3731 pif->vvi_eid = -1;
3732 } else {
3733 vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
3734 vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
3736 vr->vvr_pif = NULL;
3740 * Create the socket which is used to send VRRP packets. Further, set
3741 * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3743 static vrrp_err_t
3744 vrrpd_init_txsock(vrrp_vr_t *vr)
3746 int af;
3747 vrrp_intf_t *vif;
3748 vrrp_err_t err;
3750 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);
3752 if (vr->vvr_vif != NULL) {
3753 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
3754 vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
3755 return (VRRP_SUCCESS);
3758 af = vr->vvr_conf.vvc_af;
3759 if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
3760 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
3761 "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
3762 return (VRRP_ENOVIRT);
3765 vr->vvr_vif = vif;
3766 if (vr->vvr_conf.vvc_af == AF_INET)
3767 err = vrrpd_init_txsock_v4(vr);
3768 else
3769 err = vrrpd_init_txsock_v6(vr);
3771 if (err != VRRP_SUCCESS)
3772 goto done;
3775 * The interface should start with IFF_NOACCEPT flag not set, only
3776 * call this function when the VRRP router requires IFF_NOACCEPT.
3778 if (!vr->vvr_conf.vvc_accept)
3779 err = vrrpd_set_noaccept(vr, _B_TRUE);
3781 done:
3782 if (err != VRRP_SUCCESS) {
3783 (void) close(vif->vvi_sockfd);
3784 vif->vvi_sockfd = -1;
3785 vr->vvr_vif = NULL;
3788 return (err);
3792 * Create the IPv4 socket which is used to send VRRP packets. Note that
3793 * the destination MAC address of VRRP advertisement must be the virtual
3794 * MAC address, so we specify the output interface to be the specific VNIC.
3796 static vrrp_err_t
3797 vrrpd_init_txsock_v4(vrrp_vr_t *vr)
3799 vrrp_intf_t *vif; /* VNIC interface used to send packets */
3800 vrrp_ip_t *vip; /* The first IP over the VNIC */
3801 int on = 1;
3802 char off = 0;
3803 vrrp_err_t err = VRRP_SUCCESS;
3804 char abuf[INET6_ADDRSTRLEN];
3806 vif = vr->vvr_vif;
3807 assert(vr->vvr_conf.vvc_af == AF_INET);
3808 assert(vif != NULL);
3810 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
3811 vr->vvr_conf.vvc_name, vif->vvi_ifname);
3813 if (vif->vvi_sockfd != -1) {
3814 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
3815 "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3816 return (VRRP_SUCCESS);
3819 vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3820 if (vif->vvi_sockfd < 0) {
3821 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
3822 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3823 err = VRRP_ESYS;
3824 goto done;
3828 * Include the IP header, so that we can specify the IP address/ttl.
3830 if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
3831 sizeof (on)) < 0) {
3832 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
3833 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3834 err = VRRP_ESYS;
3835 goto done;
3839 * Disable multicast loopback.
3841 if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
3842 sizeof (char)) == -1) {
3843 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
3844 "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3845 strerror(errno));
3846 err = VRRP_ESYS;
3847 goto done;
3850 vip = TAILQ_FIRST(&vif->vvi_iplist);
3851 /* LINTED E_CONSTANT_CONDITION */
3852 VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
3853 _B_FALSE);
3856 * Set the output interface to send the VRRP packet.
3858 if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
3859 &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
3860 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3861 "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
3862 err = VRRP_ESYS;
3863 } else {
3864 vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3865 "succeed", vr->vvr_conf.vvc_name, abuf);
3868 done:
3869 if (err != VRRP_SUCCESS) {
3870 (void) close(vif->vvi_sockfd);
3871 vif->vvi_sockfd = -1;
3874 return (err);
3878 * Create the IPv6 socket which is used to send VRRP packets. Note that
3879 * the destination must be the virtual MAC address, so we specify the output
3880 * interface to be the specific VNIC.
3882 static vrrp_err_t
3883 vrrpd_init_txsock_v6(vrrp_vr_t *vr)
3885 vrrp_intf_t *vif; /* VNIC interface used to send packets */
3886 int off = 0, ttl = VRRP_IP_TTL;
3887 vrrp_err_t err = VRRP_SUCCESS;
3889 vif = vr->vvr_vif;
3890 assert(vr->vvr_conf.vvc_af == AF_INET6);
3891 assert(vif != NULL);
3893 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
3894 vr->vvr_conf.vvc_name, vif->vvi_ifname);
3896 if (vif->vvi_sockfd != -1) {
3897 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
3898 "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3899 return (VRRP_SUCCESS);
3902 vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3903 if (vif->vvi_sockfd < 0) {
3904 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
3905 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3906 err = VRRP_ESYS;
3907 goto done;
3911 * Disable multicast loopback.
3913 if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
3914 &off, sizeof (int)) == -1) {
3915 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
3916 "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3917 strerror(errno));
3918 err = VRRP_ESYS;
3919 goto done;
3923 * Set the multicast TTL.
3925 if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
3926 &ttl, sizeof (int)) == -1) {
3927 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
3928 "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
3929 ttl, strerror(errno));
3930 err = VRRP_ESYS;
3931 goto done;
3935 * Set the output interface to send the VRRP packet.
3937 if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
3938 &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
3939 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
3940 "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
3941 strerror(errno));
3942 err = VRRP_ESYS;
3943 } else {
3944 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
3945 " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
3948 done:
3949 if (err != VRRP_SUCCESS) {
3950 (void) close(vif->vvi_sockfd);
3951 vif->vvi_sockfd = -1;
3954 return (err);
3958 * Delete the socket which is used to send VRRP packets. Further, clear
3959 * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3961 static void
3962 vrrpd_fini_txsock(vrrp_vr_t *vr)
3964 vrrp_intf_t *vif = vr->vvr_vif;
3966 vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);
3968 if (vif != NULL) {
3969 if (!vr->vvr_conf.vvc_accept)
3970 (void) vrrpd_set_noaccept(vr, _B_FALSE);
3971 (void) close(vif->vvi_sockfd);
3972 vif->vvi_sockfd = -1;
3973 vr->vvr_vif = NULL;
3978 * Given the the pseudo header cksum value (sum), caculate the cksum with
3979 * the rest of VRRP packet.
3981 static uint16_t
3982 in_cksum(int sum, uint16_t plen, void *p)
3984 int nleft;
3985 uint16_t *w;
3986 uint16_t answer;
3987 uint16_t odd_byte = 0;
3989 nleft = plen;
3990 w = (uint16_t *)p;
3991 while (nleft > 1) {
3992 sum += *w++;
3993 nleft -= 2;
3996 /* mop up an odd byte, if necessary */
3997 if (nleft == 1) {
3998 *(uchar_t *)(&odd_byte) = *(uchar_t *)w;
3999 sum += odd_byte;
4003 * add back carry outs from top 16 bits to low 16 bits
4005 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
4006 sum += (sum >> 16); /* add carry */
4007 answer = ~sum; /* truncate to 16 bits */
4008 return (answer == 0 ? ~0 : answer);
4011 /* Pseudo header for v4 */
4012 struct pshv4 {
4013 struct in_addr ph4_src;
4014 struct in_addr ph4_dst;
4015 uint8_t ph4_zero; /* always zero */
4016 uint8_t ph4_protocol; /* protocol used, IPPROTO_VRRP */
4017 uint16_t ph4_len; /* VRRP payload len */
4021 * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4022 * packet length (in the host byte order), and both IP source and destination
4023 * addresses are in the network byte order.
4025 static uint16_t
4026 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
4027 vrrp_pkt_t *vp)
4029 struct pshv4 ph4;
4030 int nleft;
4031 uint16_t *w;
4032 int sum = 0;
4034 ph4.ph4_src = *src;
4035 ph4.ph4_dst = *dst;
4036 ph4.ph4_zero = 0;
4037 ph4.ph4_protocol = IPPROTO_VRRP;
4038 ph4.ph4_len = htons(plen);
4041 * Our algorithm is simple, using a 32 bit accumulator (sum),
4042 * we add sequential 16 bit words to it, and at the end, fold
4043 * back all the carry bits from the top 16 bits into the lower
4044 * 16 bits.
4046 nleft = sizeof (struct pshv4);
4047 w = (uint16_t *)&ph4;
4048 while (nleft > 0) {
4049 sum += *w++;
4050 nleft -= 2;
4053 return (in_cksum(sum, plen, vp));
4056 /* Pseudo header for v6 */
4057 struct pshv6 {
4058 struct in6_addr ph6_src;
4059 struct in6_addr ph6_dst;
4060 uint32_t ph6_len; /* VRRP payload len */
4061 uint32_t ph6_zero : 24,
4062 ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
4066 * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4067 * packet length (in the host byte order), and both IP source and destination
4068 * addresses are in the network byte order.
4070 static uint16_t
4071 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
4072 vrrp_pkt_t *vp)
4074 struct pshv6 ph6;
4075 int nleft;
4076 uint16_t *w;
4077 int sum = 0;
4079 ph6.ph6_src = *src;
4080 ph6.ph6_dst = *dst;
4081 ph6.ph6_zero = 0;
4082 ph6.ph6_protocol = IPPROTO_VRRP;
4083 ph6.ph6_len = htonl((uint32_t)plen);
4086 * Our algorithm is simple, using a 32 bit accumulator (sum),
4087 * we add sequential 16 bit words to it, and at the end, fold
4088 * back all the carry bits from the top 16 bits into the lower
4089 * 16 bits.
4091 nleft = sizeof (struct pshv6);
4092 w = (uint16_t *)&ph6;
4093 while (nleft > 0) {
4094 sum += *w++;
4095 nleft -= 2;
4098 return (in_cksum(sum, plen, vp));
4101 vrrp_err_t
4102 vrrpd_state_i2m(vrrp_vr_t *vr)
4104 vrrp_err_t err;
4106 vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);
4108 vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
4109 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4110 return (err);
4112 (void) vrrpd_send_adv(vr, _B_FALSE);
4114 vr->vvr_err = VRRP_SUCCESS;
4115 vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4116 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4117 vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4118 vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
4119 return (VRRP_ESYS);
4120 } else {
4121 vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
4122 "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4123 vr->vvr_timeout);
4125 return (VRRP_SUCCESS);
4128 vrrp_err_t
4129 vrrpd_state_i2b(vrrp_vr_t *vr)
4131 vrrp_err_t err;
4133 vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);
4135 vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
4136 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4137 return (err);
4140 * Reinitialize the Master advertisement interval to be the configured
4141 * value.
4143 vr->vvr_err = VRRP_SUCCESS;
4144 vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
4145 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4146 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4147 vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4148 vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
4149 return (VRRP_ESYS);
4150 } else {
4151 vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
4152 "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4153 vr->vvr_timeout);
4155 return (VRRP_SUCCESS);
4158 void
4159 vrrpd_state_m2i(vrrp_vr_t *vr)
4161 vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);
4163 vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
4164 (void) vrrpd_virtualip_update(vr, _B_TRUE);
4165 bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4166 (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4169 void
4170 vrrpd_state_b2i(vrrp_vr_t *vr)
4172 vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);
4174 bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4175 (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4176 vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
4177 (void) vrrpd_virtualip_update(vr, _B_TRUE);
4180 /* ARGSUSED */
4181 static void
4182 vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
4184 vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4186 vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
4187 (void) vrrpd_state_b2m(vr);
4190 /* ARGSUSED */
4191 static void
4192 vrrp_adv_timeout(iu_tq_t *tq, void *arg)
4194 vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4196 vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);
4198 (void) vrrpd_send_adv(vr, _B_FALSE);
4199 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4200 vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4201 vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
4202 vr->vvr_conf.vvc_name);
4203 } else {
4204 vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
4205 "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4206 vr->vvr_timeout);
4210 vrrp_err_t
4211 vrrpd_state_b2m(vrrp_vr_t *vr)
4213 vrrp_err_t err;
4215 vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);
4217 vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
4218 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4219 return (err);
4220 (void) vrrpd_send_adv(vr, _B_FALSE);
4222 vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4223 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4224 vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4225 vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
4226 vr->vvr_conf.vvc_name);
4227 return (VRRP_ESYS);
4228 } else {
4229 vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
4230 "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4231 vr->vvr_timeout);
4233 return (VRRP_SUCCESS);
4236 vrrp_err_t
4237 vrrpd_state_m2b(vrrp_vr_t *vr)
4239 vrrp_err_t err;
4241 vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);
4243 vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
4244 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4245 return (err);
4248 * Cancel the adver_timer.
4250 vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
4251 (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4252 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4253 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4254 vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4255 vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
4256 vr->vvr_conf.vvc_name);
4257 } else {
4258 vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
4259 "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4260 vr->vvr_timeout);
4262 return (VRRP_SUCCESS);
4266 * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
4267 * based on its access mode.
4269 static vrrp_err_t
4270 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
4272 vrrp_intf_t *vif = vr->vvr_vif;
4273 uint64_t curr_flags;
4274 struct lifreq lifr;
4275 int s;
4277 vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4278 vr->vvr_conf.vvc_name, on ? "on" : "off");
4281 * Possibly no virtual address exists on this VRRP router yet.
4283 if (vif == NULL)
4284 return (VRRP_SUCCESS);
4286 vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4287 vif->vvi_ifname, vrrp_state2str(vr->vvr_state));
4289 s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4290 (void) strncpy(lifr.lifr_name, vif->vvi_ifname,
4291 sizeof (lifr.lifr_name));
4292 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4293 if (errno != ENXIO && errno != ENOENT) {
4294 vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
4295 "SIOCGLIFFLAGS on %s failed: %s",
4296 vif->vvi_ifname, strerror(errno));
4298 return (VRRP_ESYS);
4301 curr_flags = lifr.lifr_flags;
4302 if (on)
4303 lifr.lifr_flags |= IFF_NOACCEPT;
4304 else
4305 lifr.lifr_flags &= ~IFF_NOACCEPT;
4307 if (lifr.lifr_flags != curr_flags) {
4308 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4309 if (errno != ENXIO && errno != ENOENT) {
4310 vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
4311 "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
4312 on ? "no_accept" : "accept",
4313 lifr.lifr_flags, vif->vvi_ifname,
4314 strerror(errno));
4316 return (VRRP_ESYS);
4319 return (VRRP_SUCCESS);
4322 static vrrp_err_t
4323 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
4325 vrrp_state_t state = vif->vvi_vr_state;
4326 struct lifreq lifr;
4327 char abuf[INET6_ADDRSTRLEN];
4328 int af = vif->vvi_af;
4329 uint64_t curr_flags;
4330 int s;
4332 assert(IS_VIRTUAL_INTF(vif));
4334 /* LINTED E_CONSTANT_CONDITION */
4335 VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
4336 vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
4337 vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");
4339 s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4340 (void) strncpy(lifr.lifr_name, ip->vip_lifname,
4341 sizeof (lifr.lifr_name));
4342 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4343 if (errno != ENXIO && errno != ENOENT) {
4344 vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
4345 "SIOCGLIFFLAGS on %s/%s failed: %s",
4346 vif->vvi_ifname, lifr.lifr_name, abuf,
4347 strerror(errno));
4349 return (VRRP_ESYS);
4352 curr_flags = lifr.lifr_flags;
4353 if (state == VRRP_STATE_MASTER)
4354 lifr.lifr_flags |= IFF_UP;
4355 else
4356 lifr.lifr_flags &= ~IFF_UP;
4358 if (lifr.lifr_flags == curr_flags)
4359 return (VRRP_SUCCESS);
4361 if (checkonly) {
4362 vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
4363 ip->vip_lifname, abuf,
4364 state == VRRP_STATE_MASTER ? "down" : "up");
4365 return (VRRP_ESYS);
4366 } else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4367 if (errno != ENXIO && errno != ENOENT) {
4368 vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
4369 "bring %s %s/%s failed: %s",
4370 vif->vvi_ifname, vrrp_state2str(state),
4371 state == VRRP_STATE_MASTER ? "up" : "down",
4372 ip->vip_lifname, abuf, strerror(errno));
4374 return (VRRP_ESYS);
4376 return (VRRP_SUCCESS);
4379 static vrrp_err_t
4380 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
4382 vrrp_state_t state;
4383 vrrp_intf_t *vif = vr->vvr_vif;
4384 vrrp_ip_t *ip, *nextip;
4385 char abuf[INET6_ADDRSTRLEN];
4386 vrrp_err_t err;
4388 vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
4389 vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
4390 vif->vvi_ifname, checkonly ? " checkonly" : "");
4392 state = vr->vvr_state;
4393 assert(vif != NULL);
4394 assert(IS_VIRTUAL_INTF(vif));
4395 assert(vif->vvi_vr_state != state);
4396 vif->vvi_vr_state = state;
4397 for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
4398 nextip = TAILQ_NEXT(ip, vip_next);
4399 err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
4400 if (!checkonly && err != VRRP_SUCCESS) {
4401 /* LINTED E_CONSTANT_CONDITION */
4402 VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
4403 INET6_ADDRSTRLEN, _B_FALSE);
4404 vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
4405 "%s over %s failed", abuf, vif->vvi_ifname);
4406 vrrpd_delete_ip(vif, ip);
4411 * The IP address is deleted when it is failed to be brought
4412 * up. If no IP addresses are left, delete this interface.
4414 if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
4415 vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
4416 "no IP left over %s", vif->vvi_ifname);
4417 vrrpd_delete_if(vif, _B_TRUE);
4418 return (VRRP_ENOVIRT);
4420 return (VRRP_SUCCESS);
4423 void
4424 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
4426 vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
4427 vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));
4429 assert(vr->vvr_state == prev_s);
4430 vr->vvr_state = s;
4431 vr->vvr_prev_state = prev_s;
4432 (void) gettimeofday(&vr->vvr_st_time, NULL);
4433 (void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
4436 static int
4437 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
4439 sysevent_id_t eid;
4440 nvlist_t *nvl = NULL;
4443 * sysevent is not supported in the non-global zone
4445 if (getzoneid() != GLOBAL_ZONEID)
4446 return (0);
4448 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
4449 goto failed;
4451 if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
4452 VRRP_EVENT_CUR_VERSION) != 0)
4453 goto failed;
4455 if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
4456 goto failed;
4458 if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
4459 goto failed;
4461 if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
4462 goto failed;
4464 if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
4465 SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
4466 nvlist_free(nvl);
4467 return (0);
4470 failed:
4471 vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
4472 "sysevent posting failed: %s", vrrp_state2str(prev_st),
4473 vrrp_state2str(st), strerror(errno));
4475 nvlist_free(nvl);
4476 return (-1);
4480 * timeval processing functions
4482 static int
4483 timeval_to_milli(struct timeval tv)
4485 return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
4488 static struct timeval
4489 timeval_delta(struct timeval t1, struct timeval t2)
4491 struct timeval t;
4492 t.tv_sec = t1.tv_sec - t2.tv_sec;
4493 t.tv_usec = t1.tv_usec - t2.tv_usec;
4495 if (t.tv_usec < 0) {
4496 t.tv_usec += 1000000;
4497 t.tv_sec--;
4499 return (t);
4503 * print error messages to the terminal or to syslog
4505 static void
4506 vrrp_log(int level, char *message, ...)
4508 va_list ap;
4509 int log_level = -1;
4511 va_start(ap, message);
4513 if (vrrp_logflag == 0) {
4514 if (level <= vrrp_debug_level) {
4516 * VRRP_ERR goes to stderr, others go to stdout
4518 FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
4519 (void) fprintf(out, "vrrpd: ");
4520 /* LINTED: E_SEC_PRINTF_VAR_FMT */
4521 (void) vfprintf(out, message, ap);
4522 (void) fprintf(out, "\n");
4523 (void) fflush(out);
4525 va_end(ap);
4526 return;
4530 * translate VRRP_* to LOG_*
4532 switch (level) {
4533 case VRRP_ERR:
4534 log_level = LOG_ERR;
4535 break;
4536 case VRRP_WARNING:
4537 log_level = LOG_WARNING;
4538 break;
4539 case VRRP_NOTICE:
4540 log_level = LOG_NOTICE;
4541 break;
4542 case VRRP_DBG0:
4543 log_level = LOG_INFO;
4544 break;
4545 default:
4546 log_level = LOG_DEBUG;
4547 break;
4550 /* LINTED: E_SEC_PRINTF_VAR_FMT */
4551 (void) vsyslog(log_level, message, ap);
4552 va_end(ap);