4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include "mpd_tables.h"
30 * Global list of phyints, phyint instances, phyint groups and the anonymous
31 * group; the latter is initialized in phyint_init().
33 struct phyint
*phyints
= NULL
;
34 struct phyint_instance
*phyint_instances
= NULL
;
35 struct phyint_group
*phyint_groups
= NULL
;
36 struct phyint_group
*phyint_anongroup
;
39 * Grouplist signature; initialized in phyint_init().
41 static uint64_t phyint_grouplistsig
;
43 static void phyint_inst_insert(struct phyint_instance
*pii
);
44 static void phyint_inst_print(struct phyint_instance
*pii
);
46 static void phyint_insert(struct phyint
*pi
, struct phyint_group
*pg
);
47 static void phyint_delete(struct phyint
*pi
);
48 static boolean_t
phyint_is_usable(struct phyint
*pi
);
50 static void logint_print(struct logint
*li
);
51 static void logint_insert(struct phyint_instance
*pii
, struct logint
*li
);
52 static struct logint
*logint_lookup(struct phyint_instance
*pii
, char *li_name
);
54 static void target_print(struct target
*tg
);
55 static void target_insert(struct phyint_instance
*pii
, struct target
*tg
);
56 static struct target
*target_first(struct phyint_instance
*pii
);
57 static struct target
*target_select_best(struct phyint_instance
*pii
);
58 static void target_flush_hosts(struct phyint_group
*pg
);
60 static void reset_pii_probes(struct phyint_instance
*pii
, struct target
*tg
);
62 static boolean_t
phyint_inst_v6_sockinit(struct phyint_instance
*pii
);
63 static boolean_t
phyint_inst_v4_sockinit(struct phyint_instance
*pii
);
65 static int phyint_state_event(struct phyint_group
*pg
, struct phyint
*pi
);
66 static int phyint_group_state_event(struct phyint_group
*pg
);
67 static int phyint_group_change_event(struct phyint_group
*pg
, ipmp_group_op_t
);
68 static int phyint_group_member_event(struct phyint_group
*pg
, struct phyint
*pi
,
71 static int logint_upcount(struct phyint
*pi
);
72 static uint64_t gensig(void);
74 /* Initialize any per-file global state. Returns 0 on success, -1 on failure */
78 phyint_grouplistsig
= gensig();
79 if (track_all_phyints
) {
80 phyint_anongroup
= phyint_group_create("");
81 if (phyint_anongroup
== NULL
)
83 phyint_group_insert(phyint_anongroup
);
88 /* Return the phyint with the given name */
90 phyint_lookup(const char *name
)
95 logdebug("phyint_lookup(%s)\n", name
);
97 for (pi
= phyints
; pi
!= NULL
; pi
= pi
->pi_next
) {
98 if (strncmp(pi
->pi_name
, name
, sizeof (pi
->pi_name
)) == 0)
105 * Lookup a phyint in the group that has the same hardware address as `pi', or
106 * NULL if there's none. If `online_only' is set, then only online phyints
107 * are considered when matching. Otherwise, phyints that had been offlined
108 * due to a duplicate hardware address will also be considered.
110 static struct phyint
*
111 phyint_lookup_hwaddr(struct phyint
*pi
, boolean_t online_only
)
115 if (pi
->pi_group
== phyint_anongroup
)
118 for (pi2
= pi
->pi_group
->pg_phyint
; pi2
!= NULL
; pi2
= pi2
->pi_pgnext
) {
123 * NOTE: even when online_only is B_FALSE, we ignore phyints
124 * that are administratively offline (rather than offline
125 * because they're dups); when they're brought back online,
126 * they'll be flagged as dups if need be.
128 if (pi2
->pi_state
== PI_OFFLINE
&&
129 (online_only
|| !pi2
->pi_hwaddrdup
))
132 if (pi2
->pi_hwaddrlen
== pi
->pi_hwaddrlen
&&
133 bcmp(pi2
->pi_hwaddr
, pi
->pi_hwaddr
, pi
->pi_hwaddrlen
) == 0)
140 * Respond to DLPI notifications. Currently, this only processes physical
141 * address changes for the phyint passed via `arg' by onlining or offlining
142 * phyints in the group.
146 phyint_link_notify(dlpi_handle_t dh
, dlpi_notifyinfo_t
*dnip
, void *arg
)
148 struct phyint
*pi
= arg
;
149 struct phyint
*oduppi
= NULL
, *duppi
= NULL
;
151 assert((dnip
->dni_note
& pi
->pi_notes
) != 0);
153 if (dnip
->dni_note
!= DL_NOTE_PHYS_ADDR
)
156 assert(dnip
->dni_physaddrlen
<= DLPI_PHYSADDR_MAX
);
159 * If our hardware address hasn't changed, there's nothing to do.
161 if (pi
->pi_hwaddrlen
== dnip
->dni_physaddrlen
&&
162 bcmp(pi
->pi_hwaddr
, dnip
->dni_physaddr
, pi
->pi_hwaddrlen
) == 0)
165 oduppi
= phyint_lookup_hwaddr(pi
, _B_FALSE
);
166 pi
->pi_hwaddrlen
= dnip
->dni_physaddrlen
;
167 (void) memcpy(pi
->pi_hwaddr
, dnip
->dni_physaddr
, pi
->pi_hwaddrlen
);
168 duppi
= phyint_lookup_hwaddr(pi
, _B_FALSE
);
170 if (oduppi
!= NULL
|| pi
->pi_hwaddrdup
) {
172 * Our old hardware address was a duplicate. If we'd been
173 * offlined because of it, and our new hardware address is not
174 * a duplicate, then bring us online. Otherwise, `oduppi'
175 * must've been the one brought offline; bring it online.
177 if (pi
->pi_hwaddrdup
) {
179 (void) phyint_undo_offline(pi
);
181 assert(oduppi
->pi_hwaddrdup
);
182 (void) phyint_undo_offline(oduppi
);
186 if (duppi
!= NULL
&& !pi
->pi_hwaddrdup
) {
188 * Our new hardware address was a duplicate and we're not
189 * yet flagged as a duplicate; bring us offline.
191 pi
->pi_hwaddrdup
= _B_TRUE
;
192 (void) phyint_offline(pi
, 0);
197 * Initialize information about the underlying link for `pi', and set us
198 * up to be notified about future changes. Returns _B_TRUE on success.
201 phyint_link_init(struct phyint
*pi
)
209 retval
= dlpi_open(pi
->pi_name
, &pi
->pi_dh
, 0);
210 if (retval
!= DLPI_SUCCESS
) {
212 errmsg
= "cannot open";
216 pi
->pi_hwaddrlen
= DLPI_PHYSADDR_MAX
;
217 retval
= dlpi_get_physaddr(pi
->pi_dh
, DL_CURR_PHYS_ADDR
, pi
->pi_hwaddr
,
219 if (retval
!= DLPI_SUCCESS
) {
220 errmsg
= "cannot get hardware address";
225 * Check if the link supports DLPI link state notifications. For
226 * historical reasons, the actual changes are tracked through routing
227 * sockets, so we immediately disable the notification upon success.
229 notes
= DL_NOTE_LINK_UP
| DL_NOTE_LINK_DOWN
;
230 retval
= dlpi_enabnotify(pi
->pi_dh
, notes
, phyint_link_notify
, pi
, &id
);
231 if (retval
== DLPI_SUCCESS
) {
232 (void) dlpi_disabnotify(pi
->pi_dh
, id
, NULL
);
233 pi
->pi_notes
|= notes
;
237 * Enable notification of hardware address changes to keep pi_hwaddr
238 * up-to-date and track if we need to offline/undo-offline phyints.
240 notes
= DL_NOTE_PHYS_ADDR
;
241 retval
= dlpi_enabnotify(pi
->pi_dh
, notes
, phyint_link_notify
, pi
, &id
);
242 if (retval
== DLPI_SUCCESS
&& poll_add(dlpi_fd(pi
->pi_dh
)) == 0)
243 pi
->pi_notes
|= notes
;
247 logerr("%s: %s: %s\n", pi
->pi_name
, errmsg
, dlpi_strerror(retval
));
248 if (pi
->pi_dh
!= NULL
) {
249 dlpi_close(pi
->pi_dh
);
256 * Close use of link on `pi'.
259 phyint_link_close(struct phyint
*pi
)
261 if (pi
->pi_notes
& DL_NOTE_PHYS_ADDR
) {
262 (void) poll_remove(dlpi_fd(pi
->pi_dh
));
263 pi
->pi_notes
&= ~DL_NOTE_PHYS_ADDR
;
267 * NOTE: we don't clear pi_notes here so that iflinkstate() can still
268 * properly report the link state even when offline (which is possible
269 * since we use IFF_RUNNING to track link state).
271 dlpi_close(pi
->pi_dh
);
275 /* Return the phyint instance with the given name and the given family */
276 struct phyint_instance
*
277 phyint_inst_lookup(int af
, char *name
)
281 if (debug
& D_PHYINT
)
282 logdebug("phyint_inst_lookup(%s %s)\n", AF_STR(af
), name
);
284 assert(af
== AF_INET
|| af
== AF_INET6
);
286 pi
= phyint_lookup(name
);
290 return (PHYINT_INSTANCE(pi
, af
));
293 struct phyint_group
*
294 phyint_group_lookup(const char *pg_name
)
296 struct phyint_group
*pg
;
298 if (debug
& D_PHYINT
)
299 logdebug("phyint_group_lookup(%s)\n", pg_name
);
301 for (pg
= phyint_groups
; pg
!= NULL
; pg
= pg
->pg_next
) {
302 if (strncmp(pg
->pg_name
, pg_name
, sizeof (pg
->pg_name
)) == 0)
309 * Insert the phyint in the linked list of all phyints. If the phyint belongs
310 * to some group, insert it in the phyint group list.
313 phyint_insert(struct phyint
*pi
, struct phyint_group
*pg
)
315 if (debug
& D_PHYINT
)
316 logdebug("phyint_insert(%s '%s')\n", pi
->pi_name
, pg
->pg_name
);
318 /* Insert the phyint at the head of the 'all phyints' list */
319 pi
->pi_next
= phyints
;
322 phyints
->pi_prev
= pi
;
326 * Insert the phyint at the head of the 'phyint_group members' list
327 * of the phyint group to which it belongs.
329 pi
->pi_pgnext
= NULL
;
330 pi
->pi_pgprev
= NULL
;
333 pi
->pi_pgnext
= pg
->pg_phyint
;
334 if (pi
->pi_pgnext
!= NULL
)
335 pi
->pi_pgnext
->pi_pgprev
= pi
;
338 /* Refresh the group state now that this phyint has been added */
339 phyint_group_refresh_state(pg
);
342 (void) phyint_group_member_event(pg
, pi
, IPMP_IF_ADD
);
345 /* Insert the phyint instance in the linked list of all phyint instances. */
347 phyint_inst_insert(struct phyint_instance
*pii
)
349 if (debug
& D_PHYINT
) {
350 logdebug("phyint_inst_insert(%s %s)\n",
351 AF_STR(pii
->pii_af
), pii
->pii_name
);
355 * Insert the phyint at the head of the 'all phyint instances' list.
357 pii
->pii_next
= phyint_instances
;
358 pii
->pii_prev
= NULL
;
359 if (phyint_instances
!= NULL
)
360 phyint_instances
->pii_prev
= pii
;
361 phyint_instances
= pii
;
365 * Create a new phyint with the given parameters. Also insert it into
366 * the list of all phyints and the list of phyint group members by calling
369 static struct phyint
*
370 phyint_create(char *pi_name
, struct phyint_group
*pg
, uint_t ifindex
,
375 pi
= calloc(1, sizeof (struct phyint
));
377 logperror("phyint_create: calloc");
382 * Record the phyint values.
384 (void) strlcpy(pi
->pi_name
, pi_name
, sizeof (pi
->pi_name
));
385 pi
->pi_taddrthresh
= getcurrentsec() + TESTADDR_CONF_TIME
;
386 pi
->pi_ifindex
= ifindex
;
387 pi
->pi_icmpid
= htons(((getpid() & 0xFF) << 8) | (ifindex
& 0xFF));
389 pi
->pi_state
= PI_INIT
;
390 pi
->pi_flags
= PHYINT_FLAGS(flags
);
393 * Initialize the link state. The link state is initialized to
394 * up, so that if the link is down when IPMP starts monitoring
395 * the interface, it will appear as though there has been a
396 * transition from the link up to link down. This avoids
397 * having to treat this situation as a special case.
401 if (!phyint_link_init(pi
)) {
407 * Insert the phyint in the list of all phyints, and the
408 * list of phyint group members
410 phyint_insert(pi
, pg
);
416 * Create a new phyint instance belonging to the phyint 'pi' and address
417 * family 'af'. Also insert it into the list of all phyint instances by
418 * calling phyint_inst_insert().
420 static struct phyint_instance
*
421 phyint_inst_create(struct phyint
*pi
, int af
)
423 struct phyint_instance
*pii
;
425 pii
= calloc(1, sizeof (struct phyint_instance
));
427 logperror("phyint_inst_create: calloc");
432 * Attach the phyint instance to the phyint.
433 * Set the back pointers as well
435 pii
->pii_phyint
= pi
;
442 pii
->pii_probe_sock
= -1;
445 pii
->pii_fd_hrtime
= gethrtime() +
446 (FAILURE_DETECTION_QP
* (hrtime_t
)NANOSEC
);
447 pii
->pii_flags
= pi
->pi_flags
;
449 /* Insert the phyint instance in the list of all phyint instances. */
450 phyint_inst_insert(pii
);
455 * Change the state of phyint `pi' to state `state'.
458 phyint_chstate(struct phyint
*pi
, enum pi_state state
)
461 * To simplify things, some callers always set a given state
462 * regardless of the previous state of the phyint (e.g., setting
463 * PI_RUNNING when it's already set). We shouldn't bother
464 * generating an event or consuming a signature for these, since
465 * the actual state of the interface is unchanged.
467 if (pi
->pi_state
== state
)
470 pi
->pi_state
= state
;
475 * Note that `pi' has changed state.
478 phyint_changed(struct phyint
*pi
)
480 pi
->pi_group
->pg_sig
++;
481 (void) phyint_state_event(pi
->pi_group
, pi
);
485 * Insert the phyint group in the linked list of all phyint groups
486 * at the head of the list
489 phyint_group_insert(struct phyint_group
*pg
)
491 pg
->pg_next
= phyint_groups
;
493 if (phyint_groups
!= NULL
)
494 phyint_groups
->pg_prev
= pg
;
497 phyint_grouplistsig
++;
498 (void) phyint_group_change_event(pg
, IPMP_GROUP_ADD
);
502 * Create a new phyint group called 'name'.
504 struct phyint_group
*
505 phyint_group_create(const char *name
)
507 struct phyint_group
*pg
;
509 if (debug
& D_PHYINT
)
510 logdebug("phyint_group_create(%s)\n", name
);
512 pg
= calloc(1, sizeof (struct phyint_group
));
514 logperror("phyint_group_create: calloc");
518 (void) strlcpy(pg
->pg_name
, name
, sizeof (pg
->pg_name
));
519 pg
->pg_sig
= gensig();
520 pg
->pg_fdt
= user_failure_detection_time
;
521 pg
->pg_probeint
= user_probe_interval
;
522 pg
->pg_in_use
= _B_TRUE
;
525 * Normal groups always start in the PG_FAILED state since they
526 * have no active interfaces. In contrast, anonymous groups are
527 * heterogeneous and thus always PG_OK.
529 pg
->pg_state
= (name
[0] == '\0' ? PG_OK
: PG_FAILED
);
535 * Change the state of the phyint group `pg' to state `state'.
538 phyint_group_chstate(struct phyint_group
*pg
, enum pg_state state
)
540 assert(pg
!= phyint_anongroup
);
543 * To simplify things, some callers always set a given state
544 * regardless of the previous state of the group (e.g., setting
545 * PG_DEGRADED when it's already set). We shouldn't bother
546 * generating an event or consuming a signature for these, since
547 * the actual state of the group is unchanged.
549 if (pg
->pg_state
== state
)
552 pg
->pg_state
= state
;
557 * We can never know with certainty that a group has
558 * failed. It is possible that all known targets have
559 * failed simultaneously, and new targets have come up
560 * instead. If the targets are routers then router
561 * discovery will kick in, and we will see the new routers
562 * thru routing socket messages. But if the targets are
563 * hosts, we have to discover it by multicast. So flush
564 * all the host targets. The next probe will send out a
565 * multicast echo request. If this is a group failure, we
566 * will still not see any response, otherwise the group
567 * will be repaired after we get NUM_PROBE_REPAIRS
568 * consecutive unicast replies on any phyint.
570 target_flush_hosts(pg
);
578 logerr("phyint_group_chstate: invalid group state %d; "
579 "aborting\n", state
);
584 (void) phyint_group_state_event(pg
);
588 * Create a new phyint instance and initialize it from the values supplied by
589 * the kernel. Always check for ENXIO before logging any error, because the
590 * interface could have vanished after completion of SIOCGLIFCONF.
592 * pointer to the phyint instance on success
593 * NULL on failure Eg. if the phyint instance is not found in the kernel
595 struct phyint_instance
*
596 phyint_inst_init_from_k(int af
, char *pi_name
)
598 char pg_name
[LIFNAMSIZ
+ 1];
604 struct phyint_instance
*pii
;
605 boolean_t pi_created
;
606 struct phyint_group
*pg
;
612 pi_created
= _B_FALSE
;
614 if (debug
& D_PHYINT
) {
615 logdebug("phyint_inst_init_from_k(%s %s)\n",
616 AF_STR(af
), pi_name
);
619 assert(af
== AF_INET
|| af
== AF_INET6
);
621 /* Get the socket for doing ioctls */
622 ifsock
= (af
== AF_INET
) ? ifsock_v4
: ifsock_v6
;
625 * Get the interface flags. Ignore virtual interfaces, IPMP
626 * meta-interfaces, point-to-point interfaces, and interfaces
627 * that can't support multicast.
629 (void) strlcpy(lifr
.lifr_name
, pi_name
, sizeof (lifr
.lifr_name
));
630 if (ioctl(ifsock
, SIOCGLIFFLAGS
, (char *)&lifr
) < 0) {
631 if (errno
!= ENXIO
) {
632 logperror("phyint_inst_init_from_k:"
633 " ioctl (get flags)");
637 flags
= lifr
.lifr_flags
;
638 if (!(flags
& IFF_MULTICAST
) ||
639 (flags
& (IFF_VIRTUAL
|IFF_IPMP
|IFF_POINTOPOINT
)))
643 * Get the ifindex for recording later in our tables, in case we need
644 * to create a new phyint.
646 if (ioctl(ifsock
, SIOCGLIFINDEX
, (char *)&lifr
) < 0) {
647 if (errno
!= ENXIO
) {
648 logperror("phyint_inst_init_from_k: "
649 " ioctl (get lifindex)");
653 ifindex
= lifr
.lifr_index
;
656 * Get the phyint group name of this phyint, from the kernel.
658 if (ioctl(ifsock
, SIOCGLIFGROUPNAME
, (char *)&lifr
) < 0) {
659 if (errno
!= ENXIO
) {
660 logperror("phyint_inst_init_from_k: "
661 "ioctl (get group name)");
665 (void) strlcpy(pg_name
, lifr
.lifr_groupname
, sizeof (pg_name
));
668 * If the phyint is not part of any group, pg_name is the
669 * null string. If 'track_all_phyints' is false, there is no
670 * need to create a phyint.
672 if (pg_name
[0] == '\0' && !track_all_phyints
) {
674 * If the IFF_FAILED, IFF_INACTIVE, or IFF_OFFLINE flags are
675 * set, reset them. These flags shouldn't be set if in.mpathd
676 * isn't tracking the interface.
678 if ((flags
& (IFF_FAILED
| IFF_INACTIVE
| IFF_OFFLINE
))) {
679 lifr
.lifr_flags
= flags
&
680 ~(IFF_FAILED
| IFF_INACTIVE
| IFF_OFFLINE
);
681 if (ioctl(ifsock
, SIOCSLIFFLAGS
, (char *)&lifr
) < 0) {
682 if (errno
!= ENXIO
) {
683 logperror("phyint_inst_init_from_k:"
684 " ioctl (set flags)");
692 * We need to create a new phyint instance. We may also need to
693 * create the group if e.g. the SIOCGLIFCONF loop in initifs() found
694 * an underlying interface before it found its IPMP meta-interface.
695 * Note that we keep any created groups even if phyint_inst_from_k()
696 * fails since a group's existence is not dependent on the ability of
697 * in.mpathd to the track the group's interfaces.
699 if ((pg
= phyint_group_lookup(pg_name
)) == NULL
) {
700 if ((pg
= phyint_group_create(pg_name
)) == NULL
) {
701 logerr("phyint_inst_init_from_k: cannot create group "
705 phyint_group_insert(pg
);
709 * Lookup the phyint. If the phyint does not exist create it.
711 pi
= phyint_lookup(pi_name
);
713 pi
= phyint_create(pi_name
, pg
, ifindex
, flags
);
715 logerr("phyint_inst_init_from_k:"
716 " unable to create phyint %s\n", pi_name
);
719 pi_created
= _B_TRUE
;
721 /* The phyint exists already. */
722 assert(pi_created
== _B_FALSE
);
724 * Normally we should see consistent values for the IPv4 and
725 * IPv6 instances, for phyint properties. If we don't, it
726 * means things have changed underneath us, and we should
727 * resync our tables with the kernel. Check whether the
728 * interface index has changed. If so, it is most likely
729 * the interface has been unplumbed and replumbed,
730 * while we are yet to update our tables. Do it now.
732 if (pi
->pi_ifindex
!= ifindex
) {
733 phyint_inst_delete(PHYINT_INSTANCE(pi
, AF_OTHER(af
)));
736 assert(PHYINT_INSTANCE(pi
, af
) == NULL
);
739 * If the group name seen by the IPv4 and IPv6 instances
740 * are different, it is most likely the groupname has
741 * changed, while we are yet to update our tables. Do it now.
743 if (strcmp(pi
->pi_group
->pg_name
, pg_name
) != 0) {
744 phyint_inst_delete(PHYINT_INSTANCE(pi
,
751 * Create a new phyint instance, corresponding to the 'af'
754 pii
= phyint_inst_create(pi
, af
);
756 logerr("phyint_inst_init_from_k: unable to create"
757 "phyint inst %s\n", pi
->pi_name
);
765 * NOTE: the change_pif_flags() implementation requires a phyint
766 * instance before it can function, so a number of tasks that would
767 * otherwise be done in phyint_create() are deferred to here.
771 * If the interface is offline, set the state to PI_OFFLINE.
772 * Otherwise, optimistically consider this interface running.
773 * Later (in process_link_state_changes()), we will adjust
774 * this to match the current state of the link. Further, if
775 * test addresses are subsequently assigned, we will
776 * transition to PI_NOTARGETS and then to either PI_RUNNING or
777 * PI_FAILED depending on the probe results.
779 if (pi
->pi_flags
& IFF_OFFLINE
) {
780 phyint_chstate(pi
, PI_OFFLINE
);
782 /* calls phyint_chstate() */
783 phyint_transition_to_running(pi
);
787 * If this a standby phyint, determine whether it should be
790 if (pi
->pi_flags
& IFF_STANDBY
)
791 phyint_standby_refresh_inactive(pi
);
794 * If this phyint does not have a unique hardware address in its
797 if (phyint_lookup_hwaddr(pi
, _B_TRUE
) != NULL
) {
798 pi
->pi_hwaddrdup
= _B_TRUE
;
799 (void) phyint_offline(pi
, 0);
807 * Bind pii_probe_sock to the address associated with pii_probe_logint.
808 * This socket will be used for sending and receiving ICMP/ICMPv6 probes to
809 * targets. Do the common part in this function, and complete the
810 * initializations by calling the protocol specific functions
811 * phyint_inst_v{4,6}_sockinit() respectively.
813 * Return values: _B_TRUE/_B_FALSE for success or failure respectively.
816 phyint_inst_sockinit(struct phyint_instance
*pii
)
819 struct phyint_group
*pg
;
821 if (debug
& D_PHYINT
) {
822 logdebug("phyint_inst_sockinit(%s %s)\n",
823 AF_STR(pii
->pii_af
), pii
->pii_name
);
826 assert(pii
->pii_probe_logint
!= NULL
);
827 assert(pii
->pii_probe_logint
->li_flags
& IFF_UP
);
828 assert(pii
->pii_probe_logint
->li_flags
& IFF_NOFAILOVER
);
829 assert(pii
->pii_af
== AF_INET
|| pii
->pii_af
== AF_INET6
);
832 * If the socket is already bound, close pii_probe_sock
834 if (pii
->pii_probe_sock
!= -1)
835 close_probe_socket(pii
, _B_TRUE
);
838 * If the phyint is not part of a named group and track_all_phyints is
839 * false, simply return.
841 pg
= pii
->pii_phyint
->pi_group
;
842 if (pg
== phyint_anongroup
&& !track_all_phyints
) {
843 if (debug
& D_PHYINT
)
844 logdebug("phyint_inst_sockinit: no group\n");
849 * Initialize the socket by calling the protocol specific function.
850 * If it succeeds, add the socket to the poll list.
852 if (pii
->pii_af
== AF_INET6
)
853 success
= phyint_inst_v6_sockinit(pii
);
855 success
= phyint_inst_v4_sockinit(pii
);
857 if (success
&& (poll_add(pii
->pii_probe_sock
) == 0))
860 /* Something failed, cleanup and return false */
861 if (pii
->pii_probe_sock
!= -1)
862 close_probe_socket(pii
, _B_FALSE
);
868 * IPv6 specific part in initializing the pii_probe_sock. This socket is
869 * used to send/receive ICMPv6 probe packets.
872 phyint_inst_v6_sockinit(struct phyint_instance
*pii
)
874 icmp6_filter_t filter
;
878 struct sockaddr_in6 testaddr
;
882 * Open a raw socket with ICMPv6 protocol.
884 * Use IPV6_BOUND_IF to make sure that probes are sent and received on
885 * the specified phyint only. Bind to the test address to ensure that
886 * the responses are sent to the specified phyint.
888 * Set the hopcount to 1 so that probe packets are not routed.
889 * Disable multicast loopback. Set the receive filter to
890 * receive only ICMPv6 echo replies.
892 pii
->pii_probe_sock
= socket(pii
->pii_af
, SOCK_RAW
, IPPROTO_ICMPV6
);
893 if (pii
->pii_probe_sock
< 0) {
894 logperror_pii(pii
, "phyint_inst_v6_sockinit: socket");
899 * Probes must not block in case of lower layer issues.
901 if ((flags
= fcntl(pii
->pii_probe_sock
, F_GETFL
, 0)) == -1) {
902 logperror_pii(pii
, "phyint_inst_v6_sockinit: fcntl"
906 if (fcntl(pii
->pii_probe_sock
, F_SETFL
,
907 flags
| O_NONBLOCK
) == -1) {
908 logperror_pii(pii
, "phyint_inst_v6_sockinit: fcntl"
909 " F_SETFL O_NONBLOCK");
913 bzero(&testaddr
, sizeof (testaddr
));
914 testaddr
.sin6_family
= AF_INET6
;
915 testaddr
.sin6_port
= 0;
916 testaddr
.sin6_addr
= pii
->pii_probe_logint
->li_addr
;
918 if (bind(pii
->pii_probe_sock
, (struct sockaddr
*)&testaddr
,
919 sizeof (testaddr
)) < 0) {
920 logperror_pii(pii
, "phyint_inst_v6_sockinit: IPv6 bind");
924 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IPV6
, IPV6_MULTICAST_IF
,
925 (char *)&pii
->pii_ifindex
, sizeof (uint_t
)) < 0) {
926 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
927 " IPV6_MULTICAST_IF");
931 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IPV6
, IPV6_BOUND_IF
,
932 &pii
->pii_ifindex
, sizeof (uint_t
)) < 0) {
933 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
938 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IPV6
, IPV6_UNICAST_HOPS
,
939 (char *)&hopcount
, sizeof (hopcount
)) < 0) {
940 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
941 " IPV6_UNICAST_HOPS");
945 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IPV6
, IPV6_MULTICAST_HOPS
,
946 (char *)&hopcount
, sizeof (hopcount
)) < 0) {
947 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
948 " IPV6_MULTICAST_HOPS");
952 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IPV6
, IPV6_MULTICAST_LOOP
,
953 (char *)&off
, sizeof (off
)) < 0) {
954 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
955 " IPV6_MULTICAST_LOOP");
960 * Filter out so that we only receive ICMP echo replies
962 ICMP6_FILTER_SETBLOCKALL(&filter
);
963 ICMP6_FILTER_SETPASS(ICMP6_ECHO_REPLY
, &filter
);
965 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_ICMPV6
, ICMP6_FILTER
,
966 (char *)&filter
, sizeof (filter
)) < 0) {
967 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
972 /* Enable receipt of hoplimit */
973 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IPV6
, IPV6_RECVHOPLIMIT
,
974 &on
, sizeof (on
)) < 0) {
975 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
976 " IPV6_RECVHOPLIMIT");
980 /* Enable receipt of timestamp */
981 if (setsockopt(pii
->pii_probe_sock
, SOL_SOCKET
, SO_TIMESTAMP
,
982 &on
, sizeof (on
)) < 0) {
983 logperror_pii(pii
, "phyint_inst_v6_sockinit: setsockopt"
992 * IPv4 specific part in initializing the pii_probe_sock. This socket is
993 * used to send/receive ICMPv4 probe packets.
996 phyint_inst_v4_sockinit(struct phyint_instance
*pii
)
998 struct sockaddr_in testaddr
;
1006 * Open a raw socket with ICMPv4 protocol.
1008 * Use IP_BOUND_IF to make sure that probes are sent and received on
1009 * the specified phyint only. Bind to the test address to ensure that
1010 * the responses are sent to the specified phyint.
1012 * Set the ttl to 1 so that probe packets are not routed.
1013 * Disable multicast loopback. Enable receipt of timestamp.
1015 pii
->pii_probe_sock
= socket(pii
->pii_af
, SOCK_RAW
, IPPROTO_ICMP
);
1016 if (pii
->pii_probe_sock
< 0) {
1017 logperror_pii(pii
, "phyint_inst_v4_sockinit: socket");
1022 * Probes must not block in case of lower layer issues.
1024 if ((flags
= fcntl(pii
->pii_probe_sock
, F_GETFL
, 0)) == -1) {
1025 logperror_pii(pii
, "phyint_inst_v4_sockinit: fcntl"
1029 if (fcntl(pii
->pii_probe_sock
, F_SETFL
,
1030 flags
| O_NONBLOCK
) == -1) {
1031 logperror_pii(pii
, "phyint_inst_v4_sockinit: fcntl"
1032 " F_SETFL O_NONBLOCK");
1036 bzero(&testaddr
, sizeof (testaddr
));
1037 testaddr
.sin_family
= AF_INET
;
1038 testaddr
.sin_port
= 0;
1039 IN6_V4MAPPED_TO_INADDR(&pii
->pii_probe_logint
->li_addr
,
1040 &testaddr
.sin_addr
);
1042 if (bind(pii
->pii_probe_sock
, (struct sockaddr
*)&testaddr
,
1043 sizeof (testaddr
)) < 0) {
1044 logperror_pii(pii
, "phyint_inst_v4_sockinit: IPv4 bind");
1048 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IP
, IP_BOUND_IF
,
1049 &pii
->pii_ifindex
, sizeof (uint_t
)) < 0) {
1050 logperror_pii(pii
, "phyint_inst_v4_sockinit: setsockopt"
1055 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IP
, IP_MULTICAST_IF
,
1056 (char *)&testaddr
.sin_addr
, sizeof (struct in_addr
)) < 0) {
1057 logperror_pii(pii
, "phyint_inst_v4_sockinit: setsockopt"
1058 " IP_MULTICAST_IF");
1062 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IP
, IP_TTL
,
1063 (char *)&ttl
, sizeof (ttl
)) < 0) {
1064 logperror_pii(pii
, "phyint_inst_v4_sockinit: setsockopt"
1069 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IP
, IP_MULTICAST_LOOP
,
1070 (char *)&char_off
, sizeof (char_off
)) == -1) {
1071 logperror_pii(pii
, "phyint_inst_v4_sockinit: setsockopt"
1072 " IP_MULTICAST_LOOP");
1076 if (setsockopt(pii
->pii_probe_sock
, IPPROTO_IP
, IP_MULTICAST_TTL
,
1077 (char *)&char_ttl
, sizeof (char_ttl
)) == -1) {
1078 logperror_pii(pii
, "phyint_inst_v4_sockinit: setsockopt"
1079 " IP_MULTICAST_TTL");
1083 if (setsockopt(pii
->pii_probe_sock
, SOL_SOCKET
, SO_TIMESTAMP
, &on
,
1085 logperror_pii(pii
, "phyint_inst_v4_sockinit: setsockopt"
1094 * Remove the phyint group from the list of 'all phyint groups'
1098 phyint_group_delete(struct phyint_group
*pg
)
1101 * The anonymous group always exists, even when empty.
1103 if (pg
== phyint_anongroup
)
1106 if (debug
& D_PHYINT
)
1107 logdebug("phyint_group_delete('%s')\n", pg
->pg_name
);
1110 * The phyint group must be empty, and must not have any phyints.
1111 * The phyint group must be in the list of all phyint groups
1113 assert(pg
->pg_phyint
== NULL
);
1114 assert(phyint_groups
== pg
|| pg
->pg_prev
!= NULL
);
1116 if (pg
->pg_prev
!= NULL
)
1117 pg
->pg_prev
->pg_next
= pg
->pg_next
;
1119 phyint_groups
= pg
->pg_next
;
1121 if (pg
->pg_next
!= NULL
)
1122 pg
->pg_next
->pg_prev
= pg
->pg_prev
;
1127 phyint_grouplistsig
++;
1128 (void) phyint_group_change_event(pg
, IPMP_GROUP_REMOVE
);
1130 addrlist_free(&pg
->pg_addrs
);
1135 * Refresh the state of `pg' based on its current members.
1138 phyint_group_refresh_state(struct phyint_group
*pg
)
1140 enum pg_state state
;
1141 enum pg_state origstate
= pg
->pg_state
;
1142 struct phyint
*pi
, *usablepi
;
1143 uint_t nif
= 0, nusable
= 0;
1146 * Anonymous groups never change state.
1148 if (pg
== phyint_anongroup
)
1151 for (pi
= pg
->pg_phyint
; pi
!= NULL
; pi
= pi
->pi_pgnext
) {
1153 if (phyint_is_usable(pi
)) {
1161 else if (nif
== nusable
)
1164 state
= PG_DEGRADED
;
1166 phyint_group_chstate(pg
, state
);
1169 * If we're shutting down, skip logging messages since otherwise our
1170 * shutdown housecleaning will make us report that groups are unusable.
1172 if (cleanup_started
)
1176 * NOTE: We use pg_failmsg_printed rather than origstate since
1177 * otherwise at startup we'll log a "now usable" message when the
1178 * first usable phyint is added to an empty group.
1180 if (state
!= PG_FAILED
&& pg
->pg_failmsg_printed
) {
1181 assert(origstate
== PG_FAILED
);
1182 logerr("At least 1 IP interface (%s) in group %s is now "
1183 "usable\n", usablepi
->pi_name
, pg
->pg_name
);
1184 pg
->pg_failmsg_printed
= _B_FALSE
;
1185 } else if (origstate
!= PG_FAILED
&& state
== PG_FAILED
) {
1186 logerr("All IP interfaces in group %s are now unusable\n",
1188 pg
->pg_failmsg_printed
= _B_TRUE
;
1193 * Extract information from the kernel about the desired phyint.
1194 * Look only for properties of the phyint and not properties of logints.
1195 * Take appropriate action on the changes.
1198 * The phyint exists in the kernel and matches our knowledge
1201 * The phyint has vanished in the kernel.
1202 * PI_IFINDEX_CHANGED
1203 * The phyint's interface index has changed.
1204 * Ask the caller to delete and recreate the phyint.
1206 * Some ioctl error. Don't change anything.
1208 * The phyint has changed group.
1211 phyint_inst_update_from_k(struct phyint_instance
*pii
)
1217 pi
= pii
->pii_phyint
;
1219 if (debug
& D_PHYINT
) {
1220 logdebug("phyint_inst_update_from_k(%s %s)\n",
1221 AF_STR(pii
->pii_af
), pi
->pi_name
);
1225 * Get the ifindex from the kernel, for comparison with the
1226 * value in our tables.
1228 (void) strncpy(lifr
.lifr_name
, pi
->pi_name
, sizeof (lifr
.lifr_name
));
1229 lifr
.lifr_name
[sizeof (lifr
.lifr_name
) - 1] = '\0';
1231 ifsock
= (pii
->pii_af
== AF_INET
) ? ifsock_v4
: ifsock_v6
;
1232 if (ioctl(ifsock
, SIOCGLIFINDEX
, &lifr
) < 0) {
1233 if (errno
== ENXIO
) {
1234 return (PI_DELETED
);
1236 logperror_pii(pii
, "phyint_inst_update_from_k:"
1237 " ioctl (get lifindex)");
1238 return (PI_IOCTL_ERROR
);
1242 if (lifr
.lifr_index
!= pi
->pi_ifindex
) {
1244 * The index has changed. Most likely the interface has
1245 * been unplumbed and replumbed. Ask the caller to take
1246 * appropriate action.
1248 if (debug
& D_PHYINT
) {
1249 logdebug("phyint_inst_update_from_k:"
1250 " old index %d new index %d\n",
1251 pi
->pi_ifindex
, lifr
.lifr_index
);
1253 return (PI_IFINDEX_CHANGED
);
1257 * Get the group name from the kernel, for comparison with
1258 * the value in our tables.
1260 if (ioctl(ifsock
, SIOCGLIFGROUPNAME
, &lifr
) < 0) {
1261 if (errno
== ENXIO
) {
1262 return (PI_DELETED
);
1264 logperror_pii(pii
, "phyint_inst_update_from_k:"
1265 " ioctl (get groupname)");
1266 return (PI_IOCTL_ERROR
);
1271 * If the phyint has changed group i.e. if the phyint group name
1272 * returned by the kernel is different, ask the caller to delete
1273 * and recreate the phyint in the right group
1275 if (strcmp(lifr
.lifr_groupname
, pi
->pi_group
->pg_name
) != 0) {
1276 /* Groupname has changed */
1277 if (debug
& D_PHYINT
) {
1278 logdebug("phyint_inst_update_from_k:"
1279 " groupname change\n");
1281 return (PI_GROUP_CHANGED
);
1285 * Get the current phyint flags from the kernel, and determine what
1286 * flags have changed by comparing against our tables. Note that the
1287 * IFF_INACTIVE processing in initifs() relies on this call to ensure
1288 * that IFF_INACTIVE is really still set on the interface.
1290 if (ioctl(ifsock
, SIOCGLIFFLAGS
, &lifr
) < 0) {
1291 if (errno
== ENXIO
) {
1292 return (PI_DELETED
);
1294 logperror_pii(pii
, "phyint_inst_update_from_k: "
1295 " ioctl (get flags)");
1296 return (PI_IOCTL_ERROR
);
1300 pi
->pi_flags
= PHYINT_FLAGS(lifr
.lifr_flags
);
1301 if (pi
->pi_v4
!= NULL
)
1302 pi
->pi_v4
->pii_flags
= pi
->pi_flags
;
1303 if (pi
->pi_v6
!= NULL
)
1304 pi
->pi_v6
->pii_flags
= pi
->pi_flags
;
1307 * Make sure the IFF_FAILED flag is set if and only if we think
1308 * the interface should be failed.
1310 if (pi
->pi_flags
& IFF_FAILED
) {
1311 if (pi
->pi_state
== PI_RUNNING
)
1312 (void) change_pif_flags(pi
, 0, IFF_FAILED
);
1314 if (pi
->pi_state
== PI_FAILED
)
1315 (void) change_pif_flags(pi
, IFF_FAILED
, IFF_INACTIVE
);
1318 /* No change in phyint status */
1323 * Delete the phyint. Remove it from the list of all phyints, and the
1324 * list of phyint group members.
1327 phyint_delete(struct phyint
*pi
)
1331 struct phyint_group
*pg
= pi
->pi_group
;
1333 if (debug
& D_PHYINT
)
1334 logdebug("phyint_delete(%s)\n", pi
->pi_name
);
1336 /* Both IPv4 and IPv6 phyint instances must have been deleted. */
1337 assert(pi
->pi_v4
== NULL
&& pi
->pi_v6
== NULL
);
1340 * The phyint must belong to a group.
1342 assert(pg
->pg_phyint
== pi
|| pi
->pi_pgprev
!= NULL
);
1344 /* The phyint must be in the list of all phyints */
1345 assert(phyints
== pi
|| pi
->pi_prev
!= NULL
);
1347 /* Remove the phyint from the phyint group list */
1349 (void) phyint_group_member_event(pg
, pi
, IPMP_IF_REMOVE
);
1351 if (pi
->pi_pgprev
== NULL
) {
1352 /* Phyint is the 1st in the phyint group list */
1353 pg
->pg_phyint
= pi
->pi_pgnext
;
1355 pi
->pi_pgprev
->pi_pgnext
= pi
->pi_pgnext
;
1357 if (pi
->pi_pgnext
!= NULL
)
1358 pi
->pi_pgnext
->pi_pgprev
= pi
->pi_pgprev
;
1359 pi
->pi_pgnext
= NULL
;
1360 pi
->pi_pgprev
= NULL
;
1362 /* Refresh the group state now that this phyint has been removed */
1363 phyint_group_refresh_state(pg
);
1365 /* Remove the phyint from the global list of phyints */
1366 if (pi
->pi_prev
== NULL
) {
1367 /* Phyint is the 1st in the list */
1368 phyints
= pi
->pi_next
;
1370 pi
->pi_prev
->pi_next
= pi
->pi_next
;
1372 if (pi
->pi_next
!= NULL
)
1373 pi
->pi_next
->pi_prev
= pi
->pi_prev
;
1378 * See if another phyint in the group had been offlined because
1379 * it was a dup of `pi' -- and if so, online it.
1381 if (!pi
->pi_hwaddrdup
&&
1382 (pi2
= phyint_lookup_hwaddr(pi
, _B_FALSE
)) != NULL
) {
1383 assert(pi2
->pi_hwaddrdup
);
1384 (void) phyint_undo_offline(pi2
);
1388 * If the interface was in a named group and was either an active
1389 * standby or the last active interface, try to activate another
1390 * interface to compensate.
1392 if (pg
!= phyint_anongroup
) {
1394 for (pi2
= pg
->pg_phyint
; pi2
!= NULL
; pi2
= pi2
->pi_pgnext
) {
1395 if (phyint_is_functioning(pi2
) &&
1396 !(pi2
->pi_flags
& IFF_INACTIVE
)) {
1403 (pi
->pi_flags
& (IFF_STANDBY
|IFF_INACTIVE
)) == IFF_STANDBY
)
1404 phyint_activate_another(pi
);
1407 phyint_link_close(pi
);
1412 * Offline phyint `pi' if at least `minred' usable interfaces remain in the
1413 * group. Returns an IPMP error code.
1416 phyint_offline(struct phyint
*pi
, uint_t minred
)
1418 boolean_t was_active
;
1419 unsigned int nusable
= 0;
1421 struct phyint_group
*pg
= pi
->pi_group
;
1424 * Verify that enough usable interfaces in the group would remain.
1425 * As a special case, if the group has failed, allow any non-offline
1426 * phyints to be offlined.
1428 if (pg
!= phyint_anongroup
) {
1429 for (pi2
= pg
->pg_phyint
; pi2
!= NULL
; pi2
= pi2
->pi_pgnext
) {
1432 if (phyint_is_usable(pi2
) ||
1433 (GROUP_FAILED(pg
) && pi2
->pi_state
!= PI_OFFLINE
))
1437 if (nusable
< minred
)
1438 return (IPMP_EMINRED
);
1440 was_active
= ((pi
->pi_flags
& IFF_INACTIVE
) == 0);
1442 if (!change_pif_flags(pi
, IFF_OFFLINE
, IFF_INACTIVE
))
1443 return (IPMP_FAILURE
);
1446 * The interface is now offline, so stop probing it. Note that
1447 * if_mpadm(1M) will down the test addresses, after receiving a
1448 * success reply from us. The routing socket message will then make us
1449 * close the socket used for sending probes. But it is more logical
1450 * that an offlined interface must not be probed, even if it has test
1453 * NOTE: stop_probing() also sets PI_OFFLINE.
1458 * If we're offlining the phyint because it has a duplicate hardware
1459 * address, print a warning -- and leave the link open so that we can
1460 * be notified of hardware address changes that make it usable again.
1461 * Otherwise, close the link so that we won't prevent a detach.
1463 if (pi
->pi_hwaddrdup
) {
1464 logerr("IP interface %s has a hardware address which is not "
1465 "unique in group %s; offlining\n", pi
->pi_name
,
1468 phyint_link_close(pi
);
1472 * If this phyint was preventing another phyint with a duplicate
1473 * hardware address from being online, bring that one online now.
1475 if (!pi
->pi_hwaddrdup
&&
1476 (pi2
= phyint_lookup_hwaddr(pi
, _B_FALSE
)) != NULL
) {
1477 assert(pi2
->pi_hwaddrdup
);
1478 (void) phyint_undo_offline(pi2
);
1482 * If this interface was active, try to activate another INACTIVE
1483 * interface in the group.
1486 phyint_activate_another(pi
);
1488 return (IPMP_SUCCESS
);
1492 * Undo a previous offline of `pi'. Returns an IPMP error code.
1495 phyint_undo_offline(struct phyint
*pi
)
1497 if (pi
->pi_state
!= PI_OFFLINE
) {
1499 return (IPMP_FAILURE
);
1503 * If necessary, reinitialize our link information and verify that its
1504 * hardware address is still unique across the group.
1506 if (pi
->pi_dh
== NULL
&& !phyint_link_init(pi
)) {
1508 return (IPMP_FAILURE
);
1511 if (phyint_lookup_hwaddr(pi
, _B_TRUE
) != NULL
) {
1512 pi
->pi_hwaddrdup
= _B_TRUE
;
1513 return (IPMP_EHWADDRDUP
);
1516 if (pi
->pi_hwaddrdup
) {
1517 logerr("IP interface %s now has a unique hardware address in "
1518 "group %s; onlining\n", pi
->pi_name
, pi
->pi_group
->pg_name
);
1519 pi
->pi_hwaddrdup
= _B_FALSE
;
1522 if (!change_pif_flags(pi
, 0, IFF_OFFLINE
))
1523 return (IPMP_FAILURE
);
1526 * While the interface was offline, it may have failed (e.g. the link
1527 * may have gone down). phyint_inst_check_for_failure() will have
1528 * already set pi_flags with IFF_FAILED, so we can use that to decide
1529 * whether the phyint should transition to running. Note that after
1530 * we transition to running, we will start sending probes again (if
1531 * test addresses are configured), which may also reveal that the
1532 * interface is in fact failed.
1534 if (pi
->pi_flags
& IFF_FAILED
) {
1535 phyint_chstate(pi
, PI_FAILED
);
1537 /* calls phyint_chstate() */
1538 phyint_transition_to_running(pi
);
1542 * Give the requestor time to configure test addresses before
1543 * complaining that they're missing.
1545 pi
->pi_taddrthresh
= getcurrentsec() + TESTADDR_CONF_TIME
;
1547 return (IPMP_SUCCESS
);
1551 * Delete (unlink and free), the phyint instance.
1554 phyint_inst_delete(struct phyint_instance
*pii
)
1556 struct phyint
*pi
= pii
->pii_phyint
;
1560 if (debug
& D_PHYINT
) {
1561 logdebug("phyint_inst_delete(%s %s)\n",
1562 AF_STR(pii
->pii_af
), pi
->pi_name
);
1566 * If the phyint instance has associated probe targets
1567 * delete all the targets
1569 while (pii
->pii_targets
!= NULL
)
1570 target_delete(pii
->pii_targets
);
1573 * Delete all the logints associated with this phyint
1576 while (pii
->pii_logint
!= NULL
)
1577 logint_delete(pii
->pii_logint
);
1580 * Close the socket used to send probes to targets from this phyint.
1582 if (pii
->pii_probe_sock
!= -1)
1583 close_probe_socket(pii
, _B_TRUE
);
1586 * Phyint instance must be in the list of all phyint instances.
1587 * Remove phyint instance from the global list of phyint instances.
1589 assert(phyint_instances
== pii
|| pii
->pii_prev
!= NULL
);
1590 if (pii
->pii_prev
== NULL
) {
1591 /* Phyint is the 1st in the list */
1592 phyint_instances
= pii
->pii_next
;
1594 pii
->pii_prev
->pii_next
= pii
->pii_next
;
1596 if (pii
->pii_next
!= NULL
)
1597 pii
->pii_next
->pii_prev
= pii
->pii_prev
;
1598 pii
->pii_next
= NULL
;
1599 pii
->pii_prev
= NULL
;
1602 * Reset the phyint instance pointer in the phyint.
1603 * If this is the last phyint instance (being deleted) on this
1604 * phyint, then delete the phyint.
1606 if (pii
->pii_af
== AF_INET
)
1611 if (pi
->pi_v4
== NULL
&& pi
->pi_v6
== NULL
)
1618 phyint_inst_print(struct phyint_instance
*pii
)
1622 char abuf
[INET6_ADDRSTRLEN
];
1626 if (pii
->pii_phyint
== NULL
) {
1627 logdebug("pii->pi_phyint NULL can't print\n");
1631 logdebug("\nPhyint instance: %s %s index %u state %x flags %llx "
1632 "sock %x in_use %d\n",
1633 AF_STR(pii
->pii_af
), pii
->pii_name
, pii
->pii_ifindex
,
1634 pii
->pii_state
, pii
->pii_phyint
->pi_flags
, pii
->pii_probe_sock
,
1637 for (li
= pii
->pii_logint
; li
!= NULL
; li
= li
->li_next
)
1641 for (tg
= pii
->pii_targets
; tg
!= NULL
; tg
= tg
->tg_next
)
1644 if (pii
->pii_targets
== NULL
)
1645 logdebug("pi_targets NULL\n");
1647 if (pii
->pii_target_next
!= NULL
) {
1648 logdebug("pi_target_next %s %s\n", AF_STR(pii
->pii_af
),
1649 pr_addr(pii
->pii_af
, pii
->pii_target_next
->tg_address
,
1650 abuf
, sizeof (abuf
)));
1652 logdebug("pi_target_next NULL\n");
1655 if (pii
->pii_rtt_target_next
!= NULL
) {
1656 logdebug("pi_rtt_target_next %s %s\n", AF_STR(pii
->pii_af
),
1657 pr_addr(pii
->pii_af
, pii
->pii_rtt_target_next
->tg_address
,
1658 abuf
, sizeof (abuf
)));
1660 logdebug("pi_rtt_target_next NULL\n");
1663 if (pii
->pii_targets
!= NULL
) {
1664 most_recent
= PROBE_INDEX_PREV(pii
->pii_probe_next
);
1668 if (pii
->pii_probes
[i
].pr_target
!= NULL
) {
1669 logdebug("#%d target %s ", i
,
1670 pr_addr(pii
->pii_af
,
1671 pii
->pii_probes
[i
].pr_target
->tg_address
,
1672 abuf
, sizeof (abuf
)));
1674 logdebug("#%d target NULL ", i
);
1676 logdebug("time_start %lld status %d "
1677 "time_ackproc %lld time_lost %u",
1678 pii
->pii_probes
[i
].pr_hrtime_start
,
1679 pii
->pii_probes
[i
].pr_status
,
1680 pii
->pii_probes
[i
].pr_hrtime_ackproc
,
1681 pii
->pii_probes
[i
].pr_time_lost
);
1682 i
= PROBE_INDEX_PREV(i
);
1683 } while (i
!= most_recent
);
1688 * Lookup a logint based on the logical interface name, on the given
1691 static struct logint
*
1692 logint_lookup(struct phyint_instance
*pii
, char *name
)
1696 if (debug
& D_LOGINT
) {
1697 logdebug("logint_lookup(%s, %s)\n",
1698 AF_STR(pii
->pii_af
), name
);
1701 for (li
= pii
->pii_logint
; li
!= NULL
; li
= li
->li_next
) {
1702 if (strncmp(name
, li
->li_name
, sizeof (li
->li_name
)) == 0)
1709 * Insert a logint at the head of the list of logints of the given
1713 logint_insert(struct phyint_instance
*pii
, struct logint
*li
)
1715 li
->li_next
= pii
->pii_logint
;
1717 if (pii
->pii_logint
!= NULL
)
1718 pii
->pii_logint
->li_prev
= li
;
1719 pii
->pii_logint
= li
;
1720 li
->li_phyint_inst
= pii
;
1724 * Create a new named logint, on the specified phyint instance.
1726 static struct logint
*
1727 logint_create(struct phyint_instance
*pii
, char *name
)
1731 if (debug
& D_LOGINT
) {
1732 logdebug("logint_create(%s %s %s)\n",
1733 AF_STR(pii
->pii_af
), pii
->pii_name
, name
);
1736 li
= calloc(1, sizeof (struct logint
));
1738 logperror("logint_create: calloc");
1742 (void) strncpy(li
->li_name
, name
, sizeof (li
->li_name
));
1743 li
->li_name
[sizeof (li
->li_name
) - 1] = '\0';
1744 logint_insert(pii
, li
);
1749 * Initialize the logint based on the data returned by the kernel.
1752 logint_init_from_k(struct phyint_instance
*pii
, char *li_name
)
1756 uint64_t saved_flags
;
1759 struct in6_addr test_subnet
;
1760 struct in6_addr testaddr
;
1761 int test_subnet_len
;
1762 struct sockaddr_in6
*sin6
;
1763 struct sockaddr_in
*sin
;
1764 char abuf
[INET6_ADDRSTRLEN
];
1765 boolean_t ptp
= _B_FALSE
;
1766 struct in6_addr tgaddr
;
1768 if (debug
& D_LOGINT
) {
1769 logdebug("logint_init_from_k(%s %s)\n",
1770 AF_STR(pii
->pii_af
), li_name
);
1773 /* Get the socket for doing ioctls */
1774 ifsock
= (pii
->pii_af
== AF_INET
) ? ifsock_v4
: ifsock_v6
;
1777 * Get the flags from the kernel. Also serves as a check whether
1778 * the logical still exists. If it doesn't exist, no need to proceed
1779 * any further. li_in_use will make the caller clean up the logint
1781 (void) strncpy(lifr
.lifr_name
, li_name
, sizeof (lifr
.lifr_name
));
1782 lifr
.lifr_name
[sizeof (lifr
.lifr_name
) - 1] = '\0';
1783 if (ioctl(ifsock
, SIOCGLIFFLAGS
, (char *)&lifr
) < 0) {
1784 /* Interface may have vanished */
1785 if (errno
!= ENXIO
) {
1786 logperror_pii(pii
, "logint_init_from_k: "
1787 "ioctl (get flags)");
1792 flags
= lifr
.lifr_flags
;
1795 * Verified the logint exists. Now lookup the logint in our tables.
1796 * If it does not exist, create a new logint.
1798 li
= logint_lookup(pii
, li_name
);
1800 li
= logint_create(pii
, li_name
);
1803 * Pretend the interface does not exist
1811 * Update li->li_flags with the new flags, after saving the old
1812 * value. This is used later to check what flags has changed and
1815 saved_flags
= li
->li_flags
;
1816 li
->li_flags
= flags
;
1819 * Get the address, prefix, prefixlength and update the logint.
1820 * Check if anything has changed. If the logint used for the
1821 * test address has changed, take suitable action.
1823 if (ioctl(ifsock
, SIOCGLIFADDR
, (char *)&lifr
) < 0) {
1824 /* Interface may have vanished */
1825 if (errno
!= ENXIO
) {
1826 logperror_li(li
, "logint_init_from_k: (get addr)");
1831 if (pii
->pii_af
== AF_INET
) {
1832 sin
= (struct sockaddr_in
*)&lifr
.lifr_addr
;
1833 IN6_INADDR_TO_V4MAPPED(&sin
->sin_addr
, &testaddr
);
1835 sin6
= (struct sockaddr_in6
*)&lifr
.lifr_addr
;
1836 testaddr
= sin6
->sin6_addr
;
1839 if (ioctl(ifsock
, SIOCGLIFSUBNET
, (char *)&lifr
) < 0) {
1840 /* Interface may have vanished */
1842 logperror_li(li
, "logint_init_from_k: (get subnet)");
1845 if (lifr
.lifr_subnet
.ss_family
== AF_INET6
) {
1846 sin6
= (struct sockaddr_in6
*)&lifr
.lifr_subnet
;
1847 test_subnet
= sin6
->sin6_addr
;
1848 test_subnet_len
= lifr
.lifr_addrlen
;
1850 sin
= (struct sockaddr_in
*)&lifr
.lifr_subnet
;
1851 IN6_INADDR_TO_V4MAPPED(&sin
->sin_addr
, &test_subnet
);
1852 test_subnet_len
= lifr
.lifr_addrlen
+ (IPV6_ABITS
- IP_ABITS
);
1856 * If this is the logint corresponding to the test address used for
1857 * sending probes, then if anything significant has changed we need to
1858 * determine the test address again. We ignore changes to the
1859 * IFF_FAILED and IFF_RUNNING flags since those happen as a matter of
1862 if (pii
->pii_probe_logint
== li
) {
1863 if (((li
->li_flags
^ saved_flags
) &
1864 ~(IFF_FAILED
| IFF_RUNNING
)) != 0 ||
1865 !IN6_ARE_ADDR_EQUAL(&testaddr
, &li
->li_addr
) ||
1866 (!ptp
&& !IN6_ARE_ADDR_EQUAL(&test_subnet
,
1868 (!ptp
&& test_subnet_len
!= li
->li_subnet_len
) ||
1869 (ptp
&& !IN6_ARE_ADDR_EQUAL(&tgaddr
, &li
->li_dstaddr
))) {
1871 * Something significant that affects the testaddress
1872 * has changed. Redo the testaddress selection later on
1873 * in select_test_ifs(). For now do the cleanup and
1874 * set pii_probe_logint to NULL.
1876 if (pii
->pii_probe_sock
!= -1)
1877 close_probe_socket(pii
, _B_TRUE
);
1878 pii
->pii_probe_logint
= NULL
;
1883 /* Update the logint with the values obtained from the kernel. */
1884 li
->li_addr
= testaddr
;
1887 li
->li_dstaddr
= tgaddr
;
1888 li
->li_subnet_len
= (pii
->pii_af
== AF_INET
) ?
1889 IP_ABITS
: IPV6_ABITS
;
1891 li
->li_subnet
= test_subnet
;
1892 li
->li_subnet_len
= test_subnet_len
;
1895 if (debug
& D_LOGINT
)
1901 logerr("logint_init_from_k: IGNORED %s %s %s addr %s\n",
1902 AF_STR(pii
->pii_af
), pii
->pii_name
, li
->li_name
,
1903 pr_addr(pii
->pii_af
, testaddr
, abuf
, sizeof (abuf
)));
1908 * Delete (unlink and free) a logint.
1911 logint_delete(struct logint
*li
)
1913 struct phyint_instance
*pii
;
1915 pii
= li
->li_phyint_inst
;
1916 assert(pii
!= NULL
);
1918 if (debug
& D_LOGINT
) {
1920 char abuf
[INET6_ADDRSTRLEN
];
1923 logdebug("logint_delete(%s %s %s/%u)\n",
1924 AF_STR(af
), li
->li_name
,
1925 pr_addr(af
, li
->li_addr
, abuf
, sizeof (abuf
)),
1929 /* logint must be in the list of logints */
1930 assert(pii
->pii_logint
== li
|| li
->li_prev
!= NULL
);
1932 /* Remove the logint from the list of logints */
1933 if (li
->li_prev
== NULL
) {
1934 /* logint is the 1st in the list */
1935 pii
->pii_logint
= li
->li_next
;
1937 li
->li_prev
->li_next
= li
->li_next
;
1939 if (li
->li_next
!= NULL
)
1940 li
->li_next
->li_prev
= li
->li_prev
;
1945 * If this logint is also being used for probing, then close the
1946 * associated socket, if it exists.
1948 if (pii
->pii_probe_logint
== li
) {
1949 if (pii
->pii_probe_sock
!= -1)
1950 close_probe_socket(pii
, _B_TRUE
);
1951 pii
->pii_probe_logint
= NULL
;
1958 logint_print(struct logint
*li
)
1960 char abuf
[INET6_ADDRSTRLEN
];
1961 int af
= li
->li_phyint_inst
->pii_af
;
1963 logdebug("logint: %s %s addr %s/%u", AF_STR(af
), li
->li_name
,
1964 pr_addr(af
, li
->li_addr
, abuf
, sizeof (abuf
)), li
->li_subnet_len
);
1966 logdebug("\tFlags: %llx in_use %d\n", li
->li_flags
, li
->li_in_use
);
1970 pr_addr(int af
, struct in6_addr addr
, char *abuf
, int len
)
1972 struct in_addr addr_v4
;
1974 if (af
== AF_INET
) {
1975 IN6_V4MAPPED_TO_INADDR(&addr
, &addr_v4
);
1976 (void) inet_ntop(AF_INET
, (void *)&addr_v4
, abuf
, len
);
1978 (void) inet_ntop(AF_INET6
, (void *)&addr
, abuf
, len
);
1984 * Fill in the sockaddr_storage pointed to by `ssp' with the IP address
1985 * represented by the [`af',`addr'] pair. Needed because in.mpathd internally
1986 * stores all addresses as in6_addrs, but we don't want to expose that.
1989 addr2storage(int af
, const struct in6_addr
*addr
, struct sockaddr_storage
*ssp
)
1991 struct sockaddr_in
*sinp
= (struct sockaddr_in
*)ssp
;
1992 struct sockaddr_in6
*sin6p
= (struct sockaddr_in6
*)ssp
;
1994 assert(af
== AF_INET
|| af
== AF_INET6
);
1998 (void) memset(sinp
, 0, sizeof (*sinp
));
1999 sinp
->sin_family
= AF_INET
;
2000 IN6_V4MAPPED_TO_INADDR(addr
, &sinp
->sin_addr
);
2003 (void) memset(sin6p
, 0, sizeof (*sin6p
));
2004 sin6p
->sin6_family
= AF_INET6
;
2005 sin6p
->sin6_addr
= *addr
;
2010 /* Lookup target on its address */
2012 target_lookup(struct phyint_instance
*pii
, struct in6_addr addr
)
2016 if (debug
& D_TARGET
) {
2017 char abuf
[INET6_ADDRSTRLEN
];
2019 logdebug("target_lookup(%s %s): addr %s\n",
2020 AF_STR(pii
->pii_af
), pii
->pii_name
,
2021 pr_addr(pii
->pii_af
, addr
, abuf
, sizeof (abuf
)));
2024 for (tg
= pii
->pii_targets
; tg
!= NULL
; tg
= tg
->tg_next
) {
2025 if (IN6_ARE_ADDR_EQUAL(&tg
->tg_address
, &addr
))
2032 * Find and return the next active target, for the next probe.
2033 * If no active targets are available, return NULL.
2036 target_next(struct target
*tg
)
2038 struct phyint_instance
*pii
= tg
->tg_phyint_inst
;
2039 struct target
*marker
= tg
;
2045 * Target must be in the list of targets for this phyint
2048 assert(pii
->pii_targets
== tg
|| tg
->tg_prev
!= NULL
);
2049 assert(pii
->pii_targets
!= NULL
);
2051 /* Return the next active target */
2054 * Go to the next target. If we hit the end,
2055 * reset the ptr to the head
2059 tg
= pii
->pii_targets
;
2061 assert(TG_STATUS_VALID(tg
->tg_status
));
2063 switch (tg
->tg_status
) {
2068 assert(pii
->pii_targets_are_routers
);
2069 if (pii
->pii_ntargets
< MAX_PROBE_TARGETS
) {
2071 * Bubble up the unused target to active
2073 tg
->tg_status
= TG_ACTIVE
;
2074 pii
->pii_ntargets
++;
2080 assert(pii
->pii_targets_are_routers
);
2081 if (tg
->tg_latime
+ MIN_RECOVERY_TIME
< now
) {
2083 * Bubble up the slow target to unused
2085 tg
->tg_status
= TG_UNUSED
;
2090 assert(pii
->pii_targets_are_routers
);
2091 if (tg
->tg_latime
+ MIN_RECOVERY_TIME
< now
) {
2093 * Bubble up the dead target to slow
2095 tg
->tg_status
= TG_SLOW
;
2096 tg
->tg_latime
= now
;
2101 } while (tg
!= marker
);
2107 * Select the best available target, that is not already TG_ACTIVE,
2108 * for the caller. The caller will determine whether it wants to
2109 * make the returned target TG_ACTIVE.
2110 * The selection order is as follows.
2111 * 1. pick a TG_UNSED target, if it exists.
2112 * 2. else pick a TG_SLOW target that has recovered, if it exists
2113 * 3. else pick any TG_SLOW target, if it exists
2114 * 4. else pick a TG_DEAD target that has recovered, if it exists
2115 * 5. else pick any TG_DEAD target, if it exists
2116 * 6. else return null
2118 static struct target
*
2119 target_select_best(struct phyint_instance
*pii
)
2122 struct target
*slow
= NULL
;
2123 struct target
*dead
= NULL
;
2124 struct target
*slow_recovered
= NULL
;
2125 struct target
*dead_recovered
= NULL
;
2130 for (tg
= pii
->pii_targets
; tg
!= NULL
; tg
= tg
->tg_next
) {
2131 assert(TG_STATUS_VALID(tg
->tg_status
));
2133 switch (tg
->tg_status
) {
2138 if (tg
->tg_latime
+ MIN_RECOVERY_TIME
< now
) {
2139 slow_recovered
= tg
;
2141 * Promote the slow_recovered to unused
2143 tg
->tg_status
= TG_UNUSED
;
2150 if (tg
->tg_latime
+ MIN_RECOVERY_TIME
< now
) {
2151 dead_recovered
= tg
;
2153 * Promote the dead_recovered to slow
2155 tg
->tg_status
= TG_SLOW
;
2156 tg
->tg_latime
= now
;
2167 if (slow_recovered
!= NULL
)
2168 return (slow_recovered
);
2169 else if (slow
!= NULL
)
2171 else if (dead_recovered
!= NULL
)
2172 return (dead_recovered
);
2178 * Some target was deleted. If we don't have even MIN_PROBE_TARGETS
2179 * that are active, pick the next best below.
2182 target_activate_all(struct phyint_instance
*pii
)
2186 assert(pii
->pii_ntargets
== 0);
2187 assert(pii
->pii_target_next
== NULL
);
2188 assert(pii
->pii_rtt_target_next
== NULL
);
2189 assert(pii
->pii_targets_are_routers
);
2191 while (pii
->pii_ntargets
< MIN_PROBE_TARGETS
) {
2192 tg
= target_select_best(pii
);
2194 /* We are out of targets */
2198 assert(TG_STATUS_VALID(tg
->tg_status
));
2199 assert(tg
->tg_status
!= TG_ACTIVE
);
2200 tg
->tg_status
= TG_ACTIVE
;
2201 pii
->pii_ntargets
++;
2202 if (pii
->pii_target_next
== NULL
) {
2203 pii
->pii_target_next
= tg
;
2204 pii
->pii_rtt_target_next
= tg
;
2209 static struct target
*
2210 target_first(struct phyint_instance
*pii
)
2214 for (tg
= pii
->pii_targets
; tg
!= NULL
; tg
= tg
->tg_next
) {
2215 assert(TG_STATUS_VALID(tg
->tg_status
));
2216 if (tg
->tg_status
== TG_ACTIVE
)
2224 * Create a default target entry.
2227 target_create(struct phyint_instance
*pii
, struct in6_addr addr
,
2228 boolean_t is_router
)
2234 if (debug
& D_TARGET
) {
2235 char abuf
[INET6_ADDRSTRLEN
];
2237 logdebug("target_create(%s %s, %s)\n",
2238 AF_STR(pii
->pii_af
), pii
->pii_name
,
2239 pr_addr(pii
->pii_af
, addr
, abuf
, sizeof (abuf
)));
2243 * If the test address is not yet initialized, do not add
2244 * any target, since we cannot determine whether the target
2245 * belongs to the same subnet as the test address.
2247 li
= pii
->pii_probe_logint
;
2252 * If there are multiple subnets associated with an interface, then
2253 * add the target to this phyint instance only if it belongs to the
2254 * same subnet as the test address. This assures us that we will
2255 * be able to reach this target through our routing table.
2257 if (!prefix_equal(li
->li_subnet
, addr
, li
->li_subnet_len
))
2260 if (pii
->pii_targets
!= NULL
) {
2261 assert(pii
->pii_ntargets
<= MAX_PROBE_TARGETS
);
2263 if (!pii
->pii_targets_are_routers
) {
2265 * Prefer router over hosts. Using hosts is a
2266 * fallback mechanism, hence delete all host
2269 while (pii
->pii_targets
!= NULL
)
2270 target_delete(pii
->pii_targets
);
2274 * Routers take precedence over hosts. If this
2275 * is a router list and we are trying to add a
2276 * host, just return. If this is a host list
2277 * and if we have sufficient targets, just return
2279 if (pii
->pii_targets_are_routers
||
2280 pii
->pii_ntargets
== MAX_PROBE_TARGETS
)
2285 tg
= calloc(1, sizeof (struct target
));
2287 logperror("target_create: calloc");
2291 tg
->tg_phyint_inst
= pii
;
2292 tg
->tg_address
= addr
;
2295 tg
->tg_num_deferred
= 0;
2298 * If this is the first target, set 'pii_targets_are_routers'
2299 * The list of targets is either a list of hosts or list or
2300 * routers, but not a mix.
2302 if (pii
->pii_targets
== NULL
) {
2303 assert(pii
->pii_ntargets
== 0);
2304 assert(pii
->pii_target_next
== NULL
);
2305 assert(pii
->pii_rtt_target_next
== NULL
);
2306 pii
->pii_targets_are_routers
= is_router
? 1 : 0;
2309 if (pii
->pii_ntargets
== MAX_PROBE_TARGETS
) {
2310 assert(pii
->pii_targets_are_routers
);
2311 assert(pii
->pii_target_next
!= NULL
);
2312 assert(pii
->pii_rtt_target_next
!= NULL
);
2313 tg
->tg_status
= TG_UNUSED
;
2315 if (pii
->pii_ntargets
== 0) {
2316 assert(pii
->pii_target_next
== NULL
);
2317 pii
->pii_target_next
= tg
;
2318 pii
->pii_rtt_target_next
= tg
;
2320 pii
->pii_ntargets
++;
2321 tg
->tg_status
= TG_ACTIVE
;
2324 target_insert(pii
, tg
);
2327 * Change state to PI_RUNNING if this phyint instance is capable of
2328 * sending and receiving probes -- that is, if we know of at least 1
2329 * target, and this phyint instance is probe-capable. For more
2330 * details, see the phyint state diagram in mpd_probe.c.
2332 pi
= pii
->pii_phyint
;
2333 if (pi
->pi_state
== PI_NOTARGETS
&& PROBE_CAPABLE(pii
)) {
2334 if (pi
->pi_flags
& IFF_FAILED
)
2335 phyint_chstate(pi
, PI_FAILED
);
2337 phyint_chstate(pi
, PI_RUNNING
);
2342 * Add the target address named by `addr' to phyint instance `pii' if it does
2343 * not already exist. If the target is a router, `is_router' should be set to
2347 target_add(struct phyint_instance
*pii
, struct in6_addr addr
,
2348 boolean_t is_router
)
2355 tg
= target_lookup(pii
, addr
);
2358 * If the target does not exist, create it; target_create() will set
2359 * tg_in_use to true. Even if it exists already, if it's a router
2360 * target and we'd previously learned of it through multicast, then we
2361 * need to recreate it as a router target. Otherwise, just set
2362 * tg_in_use to to true so that init_router_targets() won't delete it.
2364 if (tg
== NULL
|| (is_router
&& !pii
->pii_targets_are_routers
))
2365 target_create(pii
, addr
, is_router
);
2371 * Insert target at head of linked list of targets for the associated
2375 target_insert(struct phyint_instance
*pii
, struct target
*tg
)
2377 tg
->tg_next
= pii
->pii_targets
;
2379 if (tg
->tg_next
!= NULL
)
2380 tg
->tg_next
->tg_prev
= tg
;
2381 pii
->pii_targets
= tg
;
2385 * Delete a target (unlink and free).
2388 target_delete(struct target
*tg
)
2391 struct phyint_instance
*pii
;
2392 struct phyint_instance
*pii_other
;
2394 pii
= tg
->tg_phyint_inst
;
2397 if (debug
& D_TARGET
) {
2398 char abuf
[INET6_ADDRSTRLEN
];
2400 logdebug("target_delete(%s %s, %s)\n",
2401 AF_STR(af
), pii
->pii_name
,
2402 pr_addr(af
, tg
->tg_address
, abuf
, sizeof (abuf
)));
2406 * Target must be in the list of targets for this phyint
2409 assert(pii
->pii_targets
== tg
|| tg
->tg_prev
!= NULL
);
2412 * Reset all references to 'tg' in the probe information
2415 reset_pii_probes(pii
, tg
);
2418 * Remove this target from the list of targets of this
2421 if (tg
->tg_prev
== NULL
) {
2422 pii
->pii_targets
= tg
->tg_next
;
2424 tg
->tg_prev
->tg_next
= tg
->tg_next
;
2427 if (tg
->tg_next
!= NULL
)
2428 tg
->tg_next
->tg_prev
= tg
->tg_prev
;
2433 if (tg
->tg_status
== TG_ACTIVE
)
2434 pii
->pii_ntargets
--;
2437 * Adjust the next target to probe, if it points to
2438 * to the currently deleted target.
2440 if (pii
->pii_target_next
== tg
)
2441 pii
->pii_target_next
= target_first(pii
);
2443 if (pii
->pii_rtt_target_next
== tg
)
2444 pii
->pii_rtt_target_next
= target_first(pii
);
2449 * The number of active targets pii_ntargets == 0 iff
2450 * the next active target pii->pii_target_next == NULL
2452 if (pii
->pii_ntargets
!= 0) {
2453 assert(pii
->pii_target_next
!= NULL
);
2454 assert(pii
->pii_rtt_target_next
!= NULL
);
2455 assert(pii
->pii_target_next
->tg_status
== TG_ACTIVE
);
2456 assert(pii
->pii_rtt_target_next
->tg_status
== TG_ACTIVE
);
2460 /* At this point, we don't have any active targets. */
2461 assert(pii
->pii_target_next
== NULL
);
2462 assert(pii
->pii_rtt_target_next
== NULL
);
2464 if (pii
->pii_targets_are_routers
) {
2466 * Activate any TG_SLOW or TG_DEAD router targets,
2467 * since we don't have any other targets
2469 target_activate_all(pii
);
2471 if (pii
->pii_ntargets
!= 0) {
2472 assert(pii
->pii_target_next
!= NULL
);
2473 assert(pii
->pii_rtt_target_next
!= NULL
);
2474 assert(pii
->pii_target_next
->tg_status
== TG_ACTIVE
);
2475 assert(pii
->pii_rtt_target_next
->tg_status
==
2482 * If we still don't have any active targets, the list must
2483 * must be really empty. There aren't even TG_SLOW or TG_DEAD
2484 * targets. Zero out the probe stats since it will not be
2485 * relevant any longer.
2487 assert(pii
->pii_targets
== NULL
);
2488 pii
->pii_targets_are_routers
= _B_FALSE
;
2489 clear_pii_probe_stats(pii
);
2490 pii_other
= phyint_inst_other(pii
);
2493 * If there are no targets on both instances and the interface would
2494 * otherwise be considered PI_RUNNING, go back to PI_NOTARGETS state,
2495 * since we cannot probe this phyint any more. For more details,
2496 * please see phyint state diagram in mpd_probe.c.
2498 if (!PROBE_CAPABLE(pii_other
) && LINK_UP(pii
->pii_phyint
) &&
2499 pii
->pii_phyint
->pi_state
!= PI_OFFLINE
)
2500 phyint_chstate(pii
->pii_phyint
, PI_NOTARGETS
);
2504 * Flush the target list of every phyint in the group, if the list
2505 * is a host target list. This is called if group failure is suspected.
2506 * If all targets have failed, multicast will subsequently discover new
2507 * targets. Else it is a group failure.
2508 * Note: This function is a no-op if the list is a router target list.
2511 target_flush_hosts(struct phyint_group
*pg
)
2514 struct phyint_instance
*pii
;
2516 if (debug
& D_TARGET
)
2517 logdebug("target_flush_hosts(%s)\n", pg
->pg_name
);
2519 for (pi
= pg
->pg_phyint
; pi
!= NULL
; pi
= pi
->pi_pgnext
) {
2521 if (pii
!= NULL
&& !pii
->pii_targets_are_routers
) {
2523 * Delete all the targets. When the list becomes
2524 * empty, target_delete() will set pii->pii_targets
2527 while (pii
->pii_targets
!= NULL
)
2528 target_delete(pii
->pii_targets
);
2531 if (pii
!= NULL
&& !pii
->pii_targets_are_routers
) {
2533 * Delete all the targets. When the list becomes
2534 * empty, target_delete() will set pii->pii_targets
2537 while (pii
->pii_targets
!= NULL
)
2538 target_delete(pii
->pii_targets
);
2544 * Reset all references to 'target' in the probe info, as this target is
2545 * being deleted. The pr_target field is guaranteed to be non-null if
2546 * pr_status is PR_UNACKED. So we change the pr_status to PR_LOST, so that
2547 * pr_target will not be accessed unconditionally.
2550 reset_pii_probes(struct phyint_instance
*pii
, struct target
*tg
)
2554 for (i
= 0; i
< PROBE_STATS_COUNT
; i
++) {
2555 if (pii
->pii_probes
[i
].pr_target
== tg
) {
2556 if (pii
->pii_probes
[i
].pr_status
== PR_UNACKED
) {
2557 probe_chstate(&pii
->pii_probes
[i
], pii
,
2560 pii
->pii_probes
[i
].pr_target
= NULL
;
2566 * Clear the probe statistics array.
2569 clear_pii_probe_stats(struct phyint_instance
*pii
)
2571 bzero(pii
->pii_probes
, sizeof (struct probe_stats
) * PROBE_STATS_COUNT
);
2572 /* Reset the next probe index in the probe stats array */
2573 pii
->pii_probe_next
= 0;
2577 target_print(struct target
*tg
)
2579 char abuf
[INET6_ADDRSTRLEN
];
2585 af
= tg
->tg_phyint_inst
->pii_af
;
2587 logdebug("Target on %s %s addr %s\n"
2588 "status %d rtt_sa %lld rtt_sd %lld crtt %d tg_in_use %d\n",
2589 AF_STR(af
), tg
->tg_phyint_inst
->pii_name
,
2590 pr_addr(af
, tg
->tg_address
, abuf
, sizeof (abuf
)),
2591 tg
->tg_status
, tg
->tg_rtt_sa
, tg
->tg_rtt_sd
,
2592 tg
->tg_crtt
, tg
->tg_in_use
);
2595 for (i
= 0; i
< tg
->tg_num_deferred
; i
++) {
2596 (void) snprintf(buf2
, sizeof (buf2
), " %dms",
2597 tg
->tg_deferred
[i
]);
2598 (void) strlcat(buf
, buf2
, sizeof (buf
));
2600 logdebug("deferred rtts:%s\n", buf
);
2604 phyint_inst_print_all(void)
2606 struct phyint_instance
*pii
;
2608 for (pii
= phyint_instances
; pii
!= NULL
; pii
= pii
->pii_next
) {
2609 phyint_inst_print(pii
);
2614 * Compare two prefixes that have the same prefix length.
2615 * Fails if the prefix length is unreasonable.
2618 prefix_equal(struct in6_addr p1
, struct in6_addr p2
, uint_t prefix_len
)
2623 if (prefix_len
> IPV6_ABITS
)
2626 for (j
= 0; prefix_len
> 8; prefix_len
-= 8, j
++)
2627 if (p1
.s6_addr
[j
] != p2
.s6_addr
[j
])
2630 /* Make the N leftmost bits one */
2631 mask
= 0xff << (8 - prefix_len
);
2632 if ((p1
.s6_addr
[j
] & mask
) != (p2
.s6_addr
[j
] & mask
))
2639 * Get the number of UP logints on phyint `pi'.
2642 logint_upcount(struct phyint
*pi
)
2647 if (pi
->pi_v4
!= NULL
) {
2648 for (li
= pi
->pi_v4
->pii_logint
; li
!= NULL
; li
= li
->li_next
) {
2649 if (li
->li_flags
& IFF_UP
)
2654 if (pi
->pi_v6
!= NULL
) {
2655 for (li
= pi
->pi_v6
->pii_logint
; li
!= NULL
; li
= li
->li_next
) {
2656 if (li
->li_flags
& IFF_UP
)
2665 * Get the phyint instance with the other (IPv4 / IPv6) protocol
2667 struct phyint_instance
*
2668 phyint_inst_other(struct phyint_instance
*pii
)
2670 if (pii
->pii_af
== AF_INET
)
2671 return (pii
->pii_phyint
->pi_v6
);
2673 return (pii
->pii_phyint
->pi_v4
);
2677 * Check whether a phyint is functioning.
2680 phyint_is_functioning(struct phyint
*pi
)
2682 if (pi
->pi_state
== PI_RUNNING
)
2684 return (pi
->pi_state
== PI_NOTARGETS
&& !(pi
->pi_flags
& IFF_FAILED
));
2688 * Check whether a phyint is usable.
2691 phyint_is_usable(struct phyint
*pi
)
2693 if (logint_upcount(pi
) == 0)
2695 return (phyint_is_functioning(pi
));
2699 * Post an EC_IPMP sysevent of subclass `subclass' and attributes `nvl'.
2700 * Before sending the event, it prepends the current version of the IPMP
2701 * sysevent API. Returns 0 on success, -1 on failure (in either case,
2705 post_event(const char *subclass
, nvlist_t
*nvl
)
2707 static evchan_t
*evchp
= NULL
;
2710 * Initialize the event channel if we haven't already done so.
2712 if (evchp
== NULL
) {
2713 errno
= sysevent_evc_bind(IPMP_EVENT_CHAN
, &evchp
, EVCH_CREAT
);
2715 logerr("cannot create event channel `%s': %s\n",
2716 IPMP_EVENT_CHAN
, strerror(errno
));
2721 errno
= nvlist_add_uint32(nvl
, IPMP_EVENT_VERSION
,
2722 IPMP_EVENT_CUR_VERSION
);
2724 logerr("cannot create `%s' event: %s", subclass
,
2729 errno
= sysevent_evc_publish(evchp
, EC_IPMP
, subclass
, "com.sun",
2730 "in.mpathd", nvl
, EVCH_NOSLEEP
);
2732 logerr("cannot send `%s' event: %s\n", subclass
,
2745 * Return the external IPMP state associated with phyint `pi'.
2747 static ipmp_if_state_t
2748 ifstate(struct phyint
*pi
)
2750 switch (pi
->pi_state
) {
2752 return (IPMP_IF_UNKNOWN
);
2755 if (pi
->pi_flags
& IFF_FAILED
)
2756 return (IPMP_IF_FAILED
);
2757 return (IPMP_IF_UNKNOWN
);
2760 return (IPMP_IF_OFFLINE
);
2763 return (IPMP_IF_FAILED
);
2766 return (IPMP_IF_OK
);
2769 logerr("ifstate: unknown state %d; aborting\n", pi
->pi_state
);
2775 * Return the external IPMP interface type associated with phyint `pi'.
2777 static ipmp_if_type_t
2778 iftype(struct phyint
*pi
)
2780 if (pi
->pi_flags
& IFF_STANDBY
)
2781 return (IPMP_IF_STANDBY
);
2783 return (IPMP_IF_NORMAL
);
2787 * Return the external IPMP link state associated with phyint `pi'.
2789 static ipmp_if_linkstate_t
2790 iflinkstate(struct phyint
*pi
)
2792 if (!(pi
->pi_notes
& (DL_NOTE_LINK_UP
|DL_NOTE_LINK_DOWN
)))
2793 return (IPMP_LINK_UNKNOWN
);
2795 return (LINK_DOWN(pi
) ? IPMP_LINK_DOWN
: IPMP_LINK_UP
);
2799 * Return the external IPMP probe state associated with phyint `pi'.
2801 static ipmp_if_probestate_t
2802 ifprobestate(struct phyint
*pi
)
2804 if (!PROBE_ENABLED(pi
->pi_v4
) && !PROBE_ENABLED(pi
->pi_v6
))
2805 return (IPMP_PROBE_DISABLED
);
2807 if (pi
->pi_state
== PI_FAILED
)
2808 return (IPMP_PROBE_FAILED
);
2810 if (!PROBE_CAPABLE(pi
->pi_v4
) && !PROBE_CAPABLE(pi
->pi_v6
))
2811 return (IPMP_PROBE_UNKNOWN
);
2813 return (IPMP_PROBE_OK
);
2817 * Return the external IPMP target mode associated with phyint instance `pii'.
2819 static ipmp_if_targmode_t
2820 iftargmode(struct phyint_instance
*pii
)
2822 if (!PROBE_ENABLED(pii
))
2823 return (IPMP_TARG_DISABLED
);
2824 else if (pii
->pii_targets_are_routers
)
2825 return (IPMP_TARG_ROUTES
);
2827 return (IPMP_TARG_MULTICAST
);
2831 * Return the external IPMP flags associated with phyint `pi'.
2833 static ipmp_if_flags_t
2834 ifflags(struct phyint
*pi
)
2836 ipmp_if_flags_t flags
= 0;
2838 if (logint_upcount(pi
) == 0)
2839 flags
|= IPMP_IFFLAG_DOWN
;
2840 if (pi
->pi_flags
& IFF_INACTIVE
)
2841 flags
|= IPMP_IFFLAG_INACTIVE
;
2842 if (pi
->pi_hwaddrdup
)
2843 flags
|= IPMP_IFFLAG_HWADDRDUP
;
2844 if (phyint_is_functioning(pi
) && flags
== 0)
2845 flags
|= IPMP_IFFLAG_ACTIVE
;
2851 * Store the test address used on phyint instance `pii' in `ssp'. If there's
2852 * no test address, 0.0.0.0 is stored.
2854 static struct sockaddr_storage
*
2855 iftestaddr(struct phyint_instance
*pii
, struct sockaddr_storage
*ssp
)
2857 if (PROBE_ENABLED(pii
))
2858 addr2storage(pii
->pii_af
, &pii
->pii_probe_logint
->li_addr
, ssp
);
2860 addr2storage(AF_INET6
, &in6addr_any
, ssp
);
2866 * Return the external IPMP group state associated with phyint group `pg'.
2868 static ipmp_group_state_t
2869 groupstate(struct phyint_group
*pg
)
2871 switch (pg
->pg_state
) {
2873 return (IPMP_GROUP_FAILED
);
2875 return (IPMP_GROUP_DEGRADED
);
2877 return (IPMP_GROUP_OK
);
2880 logerr("groupstate: unknown state %d; aborting\n", pg
->pg_state
);
2886 * Return the external IPMP probe state associated with probe `ps'.
2888 static ipmp_probe_state_t
2889 probestate(struct probe_stats
*ps
)
2891 switch (ps
->pr_status
) {
2894 return (IPMP_PROBE_LOST
);
2896 return (IPMP_PROBE_SENT
);
2898 return (IPMP_PROBE_ACKED
);
2901 logerr("probestate: unknown state %d; aborting\n", ps
->pr_status
);
2907 * Generate an ESC_IPMP_PROBE_STATE sysevent for the probe described by `pr'
2908 * on phyint instance `pii'. Returns 0 on success, -1 on failure.
2911 probe_state_event(struct probe_stats
*pr
, struct phyint_instance
*pii
)
2914 hrtime_t proc_time
= 0, recv_time
= 0;
2915 struct sockaddr_storage ss
;
2916 struct target
*tg
= pr
->pr_target
;
2917 int64_t rttavg
, rttdev
;
2919 errno
= nvlist_alloc(&nvl
, NV_UNIQUE_NAME
, 0);
2921 logperror("cannot create `interface change' event");
2925 errno
= nvlist_add_uint32(nvl
, IPMP_PROBE_ID
, pr
->pr_id
);
2929 errno
= nvlist_add_string(nvl
, IPMP_IF_NAME
, pii
->pii_phyint
->pi_name
);
2933 errno
= nvlist_add_uint32(nvl
, IPMP_PROBE_STATE
, probestate(pr
));
2937 errno
= nvlist_add_hrtime(nvl
, IPMP_PROBE_START_TIME
,
2938 pr
->pr_hrtime_start
);
2942 errno
= nvlist_add_hrtime(nvl
, IPMP_PROBE_SENT_TIME
,
2943 pr
->pr_hrtime_sent
);
2947 if (pr
->pr_status
== PR_ACKED
) {
2948 recv_time
= pr
->pr_hrtime_ackrecv
;
2949 proc_time
= pr
->pr_hrtime_ackproc
;
2952 errno
= nvlist_add_hrtime(nvl
, IPMP_PROBE_ACKRECV_TIME
, recv_time
);
2956 errno
= nvlist_add_hrtime(nvl
, IPMP_PROBE_ACKPROC_TIME
, proc_time
);
2961 addr2storage(pii
->pii_af
, &tg
->tg_address
, &ss
);
2963 addr2storage(pii
->pii_af
, &in6addr_any
, &ss
);
2965 errno
= nvlist_add_byte_array(nvl
, IPMP_PROBE_TARGET
, (uchar_t
*)&ss
,
2970 rttavg
= (tg
!= NULL
) ? (tg
->tg_rtt_sa
/ 8) : 0;
2971 errno
= nvlist_add_int64(nvl
, IPMP_PROBE_TARGET_RTTAVG
, rttavg
);
2975 rttdev
= (tg
!= NULL
) ? (tg
->tg_rtt_sd
/ 4) : 0;
2976 errno
= nvlist_add_int64(nvl
, IPMP_PROBE_TARGET_RTTDEV
, rttdev
);
2980 return (post_event(ESC_IPMP_PROBE_STATE
, nvl
));
2982 logperror("cannot create `probe state' event");
2988 * Generate an ESC_IPMP_GROUP_STATE sysevent for phyint group `pg'.
2989 * Returns 0 on success, -1 on failure.
2992 phyint_group_state_event(struct phyint_group
*pg
)
2996 errno
= nvlist_alloc(&nvl
, NV_UNIQUE_NAME
, 0);
2998 logperror("cannot create `group state change' event");
3002 errno
= nvlist_add_string(nvl
, IPMP_GROUP_NAME
, pg
->pg_name
);
3006 errno
= nvlist_add_uint64(nvl
, IPMP_GROUP_SIGNATURE
, pg
->pg_sig
);
3010 errno
= nvlist_add_uint32(nvl
, IPMP_GROUP_STATE
, groupstate(pg
));
3014 return (post_event(ESC_IPMP_GROUP_STATE
, nvl
));
3016 logperror("cannot create `group state change' event");
3022 * Generate an ESC_IPMP_GROUP_CHANGE sysevent of type `op' for phyint group
3023 * `pg'. Returns 0 on success, -1 on failure.
3026 phyint_group_change_event(struct phyint_group
*pg
, ipmp_group_op_t op
)
3030 errno
= nvlist_alloc(&nvl
, NV_UNIQUE_NAME
, 0);
3032 logperror("cannot create `group change' event");
3036 errno
= nvlist_add_string(nvl
, IPMP_GROUP_NAME
, pg
->pg_name
);
3040 errno
= nvlist_add_uint64(nvl
, IPMP_GROUP_SIGNATURE
, pg
->pg_sig
);
3044 errno
= nvlist_add_uint64(nvl
, IPMP_GROUPLIST_SIGNATURE
,
3045 phyint_grouplistsig
);
3049 errno
= nvlist_add_uint32(nvl
, IPMP_GROUP_OPERATION
, op
);
3053 return (post_event(ESC_IPMP_GROUP_CHANGE
, nvl
));
3055 logperror("cannot create `group change' event");
3061 * Generate an ESC_IPMP_GROUP_MEMBER_CHANGE sysevent for phyint `pi' in
3062 * group `pg'. Returns 0 on success, -1 on failure.
3065 phyint_group_member_event(struct phyint_group
*pg
, struct phyint
*pi
,
3070 errno
= nvlist_alloc(&nvl
, NV_UNIQUE_NAME
, 0);
3072 logperror("cannot create `group member change' event");
3076 errno
= nvlist_add_string(nvl
, IPMP_GROUP_NAME
, pg
->pg_name
);
3080 errno
= nvlist_add_uint64(nvl
, IPMP_GROUP_SIGNATURE
, pg
->pg_sig
);
3084 errno
= nvlist_add_uint32(nvl
, IPMP_IF_OPERATION
, op
);
3088 errno
= nvlist_add_string(nvl
, IPMP_IF_NAME
, pi
->pi_name
);
3092 errno
= nvlist_add_uint32(nvl
, IPMP_IF_TYPE
, iftype(pi
));
3096 errno
= nvlist_add_uint32(nvl
, IPMP_IF_STATE
, ifstate(pi
));
3100 return (post_event(ESC_IPMP_GROUP_MEMBER_CHANGE
, nvl
));
3102 logperror("cannot create `group member change' event");
3109 * Generate an ESC_IPMP_IF_CHANGE sysevent for phyint `pi' in group `pg'.
3110 * Returns 0 on success, -1 on failure.
3113 phyint_state_event(struct phyint_group
*pg
, struct phyint
*pi
)
3117 errno
= nvlist_alloc(&nvl
, NV_UNIQUE_NAME
, 0);
3119 logperror("cannot create `interface change' event");
3123 errno
= nvlist_add_string(nvl
, IPMP_GROUP_NAME
, pg
->pg_name
);
3127 errno
= nvlist_add_uint64(nvl
, IPMP_GROUP_SIGNATURE
, pg
->pg_sig
);
3131 errno
= nvlist_add_string(nvl
, IPMP_IF_NAME
, pi
->pi_name
);
3135 errno
= nvlist_add_uint32(nvl
, IPMP_IF_TYPE
, iftype(pi
));
3139 errno
= nvlist_add_uint32(nvl
, IPMP_IF_STATE
, ifstate(pi
));
3143 return (post_event(ESC_IPMP_IF_CHANGE
, nvl
));
3145 logperror("cannot create `interface change' event");
3152 * Generate a signature for use. The signature is conceptually divided
3153 * into two pieces: a random 16-bit "generation number" and a 48-bit
3154 * monotonically increasing integer. The generation number protects
3155 * against stale updates to entities (e.g., IPMP groups) that have been
3156 * deleted and since recreated.
3161 static int seeded
= 0;
3164 srand48((long)gethrtime());
3168 return ((uint64_t)lrand48() << 48 | 1);
3172 * Store the information associated with group `grname' into a dynamically
3173 * allocated structure pointed to by `*grinfopp'. Returns an IPMP error code.
3176 getgroupinfo(const char *grname
, ipmp_groupinfo_t
**grinfopp
)
3179 struct phyint_group
*pg
;
3180 char (*ifs
)[LIFNAMSIZ
];
3182 unsigned int nif
= 0, naddr
= 0;
3183 lifgroupinfo_t lifgr
;
3185 struct sockaddr_storage
*addrs
;
3188 pg
= phyint_group_lookup(grname
);
3190 return (IPMP_EUNKGROUP
);
3193 * Tally up the number of interfaces, allocate an array to hold them,
3194 * and insert their names into the array. While we're at it, if any
3195 * interface is actually enabled to send probes, save the group fdt.
3197 for (pi
= pg
->pg_phyint
; pi
!= NULL
; pi
= pi
->pi_pgnext
)
3200 ifs
= alloca(nif
* sizeof (*ifs
));
3201 for (i
= 0, pi
= pg
->pg_phyint
; pi
!= NULL
; pi
= pi
->pi_pgnext
, i
++) {
3203 (void) strlcpy(ifs
[i
], pi
->pi_name
, LIFNAMSIZ
);
3204 if (PROBE_ENABLED(pi
->pi_v4
) || PROBE_ENABLED(pi
->pi_v6
))
3210 * If this is the anonymous group, there's no other information to
3211 * collect (since there's no IPMP interface).
3213 if (pg
== phyint_anongroup
) {
3214 *grinfopp
= ipmp_groupinfo_create(pg
->pg_name
, pg
->pg_sig
, fdt
,
3215 groupstate(pg
), nif
, ifs
, "", "", "", "", 0, NULL
);
3216 return (*grinfopp
== NULL
? IPMP_ENOMEM
: IPMP_SUCCESS
);
3220 * Grab some additional information about the group from the kernel.
3221 * (NOTE: since SIOCGLIFGROUPINFO does not look up by interface name,
3222 * we can use ifsock_v4 even for a V6-only group.)
3224 (void) strlcpy(lifgr
.gi_grname
, grname
, LIFGRNAMSIZ
);
3225 if (ioctl(ifsock_v4
, SIOCGLIFGROUPINFO
, &lifgr
) == -1) {
3226 if (errno
== ENOENT
)
3227 return (IPMP_EUNKGROUP
);
3229 logperror("getgroupinfo: SIOCGLIFGROUPINFO");
3230 return (IPMP_FAILURE
);
3234 * Tally up the number of data addresses, allocate an array to hold
3235 * them, and insert their values into the array.
3237 for (addrp
= pg
->pg_addrs
; addrp
!= NULL
; addrp
= addrp
->al_next
)
3240 addrs
= alloca(naddr
* sizeof (*addrs
));
3242 for (addrp
= pg
->pg_addrs
; addrp
!= NULL
; addrp
= addrp
->al_next
) {
3244 * It's possible to have duplicate addresses (if some are
3245 * down). Weed the dups out to avoid confusing consumers.
3246 * (If groups start having tons of addresses, we'll need a
3247 * better algorithm here.)
3249 for (j
= 0; j
< i
; j
++) {
3250 if (sockaddrcmp(&addrs
[j
], &addrp
->al_addr
))
3255 addrs
[i
++] = addrp
->al_addr
;
3260 *grinfopp
= ipmp_groupinfo_create(pg
->pg_name
, pg
->pg_sig
, fdt
,
3261 groupstate(pg
), nif
, ifs
, lifgr
.gi_grifname
, lifgr
.gi_m4ifname
,
3262 lifgr
.gi_m6ifname
, lifgr
.gi_bcifname
, naddr
, addrs
);
3263 return (*grinfopp
== NULL
? IPMP_ENOMEM
: IPMP_SUCCESS
);
3267 * Store the target information associated with phyint instance `pii' into a
3268 * dynamically allocated structure pointed to by `*targinfopp'. Returns an
3272 gettarginfo(struct phyint_instance
*pii
, const char *name
,
3273 ipmp_targinfo_t
**targinfopp
)
3277 struct sockaddr_storage ss
;
3278 struct sockaddr_storage
*targs
= NULL
;
3280 if (PROBE_CAPABLE(pii
)) {
3281 targs
= alloca(pii
->pii_ntargets
* sizeof (*targs
));
3282 tg
= pii
->pii_target_next
;
3284 if (tg
->tg_status
== TG_ACTIVE
) {
3285 assert(ntarg
< pii
->pii_ntargets
);
3286 addr2storage(pii
->pii_af
, &tg
->tg_address
,
3289 if ((tg
= tg
->tg_next
) == NULL
)
3290 tg
= pii
->pii_targets
;
3291 } while (tg
!= pii
->pii_target_next
);
3293 assert(ntarg
== pii
->pii_ntargets
);
3296 *targinfopp
= ipmp_targinfo_create(name
, iftestaddr(pii
, &ss
),
3297 iftargmode(pii
), ntarg
, targs
);
3298 return (*targinfopp
== NULL
? IPMP_ENOMEM
: IPMP_SUCCESS
);
3302 * Store the information associated with interface `ifname' into a dynamically
3303 * allocated structure pointed to by `*ifinfopp'. Returns an IPMP error code.
3306 getifinfo(const char *ifname
, ipmp_ifinfo_t
**ifinfopp
)
3310 ipmp_targinfo_t
*targinfo4
;
3311 ipmp_targinfo_t
*targinfo6
;
3313 pi
= phyint_lookup(ifname
);
3315 return (IPMP_EUNKIF
);
3317 if ((retval
= gettarginfo(pi
->pi_v4
, pi
->pi_name
, &targinfo4
)) != 0 ||
3318 (retval
= gettarginfo(pi
->pi_v6
, pi
->pi_name
, &targinfo6
)) != 0)
3321 *ifinfopp
= ipmp_ifinfo_create(pi
->pi_name
, pi
->pi_group
->pg_name
,
3322 ifstate(pi
), iftype(pi
), iflinkstate(pi
), ifprobestate(pi
),
3323 ifflags(pi
), targinfo4
, targinfo6
);
3324 retval
= (*ifinfopp
== NULL
? IPMP_ENOMEM
: IPMP_SUCCESS
);
3326 if (targinfo4
!= NULL
)
3327 ipmp_freetarginfo(targinfo4
);
3328 if (targinfo6
!= NULL
)
3329 ipmp_freetarginfo(targinfo6
);
3334 * Store the current list of IPMP groups into a dynamically allocated
3335 * structure pointed to by `*grlistpp'. Returns an IPMP error code.
3338 getgrouplist(ipmp_grouplist_t
**grlistpp
)
3340 struct phyint_group
*pg
;
3341 char (*groups
)[LIFGRNAMSIZ
];
3342 unsigned int i
, ngroup
;
3345 * Tally up the number of groups, allocate an array to hold them, and
3346 * insert their names into the array.
3348 for (ngroup
= 0, pg
= phyint_groups
; pg
!= NULL
; pg
= pg
->pg_next
)
3351 groups
= alloca(ngroup
* sizeof (*groups
));
3352 for (i
= 0, pg
= phyint_groups
; pg
!= NULL
; pg
= pg
->pg_next
, i
++) {
3354 (void) strlcpy(groups
[i
], pg
->pg_name
, LIFGRNAMSIZ
);
3356 assert(i
== ngroup
);
3358 *grlistpp
= ipmp_grouplist_create(phyint_grouplistsig
, ngroup
, groups
);
3359 return (*grlistpp
== NULL
? IPMP_ENOMEM
: IPMP_SUCCESS
);
3363 * Store the address information for `ssp' (in group `grname') into a
3364 * dynamically allocated structure pointed to by `*adinfopp'. Returns an IPMP
3365 * error code. (We'd call this function getaddrinfo(), but it would conflict
3366 * with getaddrinfo(3SOCKET)).
3369 getgraddrinfo(const char *grname
, struct sockaddr_storage
*ssp
,
3370 ipmp_addrinfo_t
**adinfopp
)
3373 addrlist_t
*addrp
, *addrmatchp
= NULL
;
3374 ipmp_addr_state_t state
;
3375 const char *binding
= "";
3377 struct phyint_group
*pg
;
3379 if ((pg
= phyint_group_lookup(grname
)) == NULL
)
3380 return (IPMP_EUNKADDR
);
3383 * Walk through the data addresses, and find a match. Note that since
3384 * some of the addresses may be down, more than one may match. We
3385 * prefer an up address (if one exists).
3387 for (addrp
= pg
->pg_addrs
; addrp
!= NULL
; addrp
= addrp
->al_next
) {
3388 if (sockaddrcmp(ssp
, &addrp
->al_addr
)) {
3390 if (addrmatchp
->al_flags
& IFF_UP
)
3395 if (addrmatchp
== NULL
)
3396 return (IPMP_EUNKADDR
);
3398 state
= (addrmatchp
->al_flags
& IFF_UP
) ? IPMP_ADDR_UP
: IPMP_ADDR_DOWN
;
3399 if (state
== IPMP_ADDR_UP
) {
3400 ifsock
= (ssp
->ss_family
== AF_INET
) ? ifsock_v4
: ifsock_v6
;
3401 (void) strlcpy(lifr
.lifr_name
, addrmatchp
->al_name
, LIFNAMSIZ
);
3402 if (ioctl(ifsock
, SIOCGLIFBINDING
, &lifr
) >= 0)
3403 binding
= lifr
.lifr_binding
;
3406 *adinfopp
= ipmp_addrinfo_create(ssp
, state
, pg
->pg_name
, binding
);
3407 return (*adinfopp
== NULL
? IPMP_ENOMEM
: IPMP_SUCCESS
);
3411 * Store a snapshot of the IPMP subsystem into a dynamically allocated
3412 * structure pointed to by `*snapp'. Returns an IPMP error code.
3415 getsnap(ipmp_snap_t
**snapp
)
3417 ipmp_grouplist_t
*grlistp
;
3418 ipmp_groupinfo_t
*grinfop
;
3419 ipmp_addrinfo_t
*adinfop
;
3420 ipmp_addrlist_t
*adlistp
;
3421 ipmp_ifinfo_t
*ifinfop
;
3427 snap
= ipmp_snap_create();
3429 return (IPMP_ENOMEM
);
3434 retval
= getgrouplist(&snap
->sn_grlistp
);
3435 if (retval
!= IPMP_SUCCESS
)
3439 * Add information for each group in the list, along with all of its
3442 grlistp
= snap
->sn_grlistp
;
3443 for (i
= 0; i
< grlistp
->gl_ngroup
; i
++) {
3444 retval
= getgroupinfo(grlistp
->gl_groups
[i
], &grinfop
);
3445 if (retval
!= IPMP_SUCCESS
)
3448 retval
= ipmp_snap_addgroupinfo(snap
, grinfop
);
3449 if (retval
!= IPMP_SUCCESS
) {
3450 ipmp_freegroupinfo(grinfop
);
3454 adlistp
= grinfop
->gr_adlistp
;
3455 for (j
= 0; j
< adlistp
->al_naddr
; j
++) {
3456 retval
= getgraddrinfo(grinfop
->gr_name
,
3457 &adlistp
->al_addrs
[j
], &adinfop
);
3458 if (retval
!= IPMP_SUCCESS
)
3461 retval
= ipmp_snap_addaddrinfo(snap
, adinfop
);
3462 if (retval
!= IPMP_SUCCESS
) {
3463 ipmp_freeaddrinfo(adinfop
);
3470 * Add information for each configured phyint.
3472 for (pi
= phyints
; pi
!= NULL
; pi
= pi
->pi_next
) {
3473 retval
= getifinfo(pi
->pi_name
, &ifinfop
);
3474 if (retval
!= IPMP_SUCCESS
)
3477 retval
= ipmp_snap_addifinfo(snap
, ifinfop
);
3478 if (retval
!= IPMP_SUCCESS
) {
3479 ipmp_freeifinfo(ifinfop
);
3485 return (IPMP_SUCCESS
);
3487 ipmp_snap_free(snap
);