4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
36 * phyint: A NIC eg. hme0. This is represented as 'struct phyint'
38 * phyint instance: A protocol instance of a phyint. Eg. the IPv4 instance of
39 * hme0 or the IPv6 instance of hme0. (struct phyint_instance)
41 * logint: A logical interface eg. hme0:1 (struct logint)
43 * phyint_group: A group of phyints i.e. physical interfaces that are
44 * (i) connected to the same level 2 topology e.g. the same ethernet
46 * (ii) share the same phyint group name.
47 * Load spreading and failover occur across members of the same phyint group.
48 * phyint group members must be homogeneous. i.e. if a phyint belonging to a
49 * phyint group has a IPv6 protocol instance, then all members of the phyint
50 * group, must have IPv6 protocol instances. (struct phyint_group)
53 #define MAXDEFERREDRTT 1 /* Maximum number of deferred rtts */
56 * Status of the phyint, expressed by the return code of failure_state()
58 #define PHYINT_OK 0 /* No failure detected */
59 #define PHYINT_FAILURE 1 /* NIC failure detected */
60 #define GROUP_FAILURE 2 /* All NICs have failed */
63 * Return values of phyint_inst_update_from_k()
65 #define PI_OK 1 /* Phyint matches in the kernel */
66 #define PI_DELETED 2 /* Phyint has vanished in the kernel */
67 #define PI_IFINDEX_CHANGED 3 /* Phyint's ifindex has changed */
68 #define PI_IOCTL_ERROR 4 /* Some ioctl error */
69 #define PI_GROUP_CHANGED 5 /* The phyint has changed group. */
71 #define PHYINT_FLAGS(flags) \
72 (((flags) & (IFF_STANDBY | IFF_INACTIVE | IFF_FAILED | IFF_OFFLINE | \
73 IFF_RUNNING)) | (handle_link_notifications ? 0 : IFF_RUNNING))
75 /* A Phyint can have up to 2 instances, the IPv4 and the IPv6 instance */
76 #define PHYINT_INSTANCE(pi, af) \
77 ((af) == AF_INET ? (pi)->pi_v4 : (pi)->pi_v6)
80 * A phyint instance is probe *enabled* if it has been configured with a
81 * unique probe address (i.e., an IFF_NOFAILOVER address). It is probe
82 * *capable* if it is also able to send probes (i.e., has one or more
85 #define PROBE_ENABLED(pii) \
86 (((pii) != NULL) && ((pii)->pii_probe_sock != -1) && \
87 ((pii)->pii_probe_logint != NULL) && \
88 (((pii)->pii_probe_logint->li_dupaddr == 0)))
90 #define PROBE_CAPABLE(pii) \
91 (PROBE_ENABLED(pii) && ((pii)->pii_ntargets != 0))
93 /* Subtract b from a modulo n. i.e. (a - b) mod n */
94 #define MOD_SUB(a, b, n) \
95 ((((a) + (n)) - (b)) % (n))
97 /* Increment modulo n */
98 #define MOD_INCR(a, n) \
101 /* Decrement modulo n */
102 #define MOD_DCR(a, n) \
106 * 'index' represents an index into the circular probe stats array of
107 * size PROBE_STATS_COUNT. 0 <= index < PROBE_STATS_COUNT. This is used
108 * to access members of the pii_probes[] array defined in the phyint_instance
111 #define PROBE_INDEX_PREV(index) \
112 MOD_DCR(index, PROBE_STATS_COUNT)
114 #define PROBE_INDEX_NEXT(index) \
115 MOD_INCR(index, PROBE_STATS_COUNT)
119 * If we receive more than LINK_UP_PERMIN "link up" notifications in a minute,
120 * then don't actually perform the repair operation until we've dropped back
121 * below the threshold (or we have a probe address and our probes indicate
122 * that the link is functioning again). This is to prevent link flapping in
123 * the case where we don't have a probe address.
125 #define LINK_UP_PERMIN 2
127 #define LINK_DOWN(pi) ((pi)->pi_link_state == 0)
128 #define LINK_UP(pi) (!LINK_DOWN(pi))
129 #define FLAGS_TO_LINK_STATE(pi) (((pi)->pi_flags & IFF_RUNNING) != 0)
130 #define UPDATE_LINK_STATE(pi) ((pi)->pi_link_state = \
131 FLAGS_TO_LINK_STATE(pi) ? 1 : 0)
132 #define INIT_LINK_STATE(pi) ((pi)->pi_link_state = 1)
135 * Phyint group states; see below for the phyint group definition.
138 PG_OK
= 1, /* all interfaces in the group are working */
139 PG_DEGRADED
, /* some interfaces in the group are unusable */
140 PG_FAILED
/* all interfaces in the group are unusable */
144 * Convenience macro to check if the whole group has failed.
146 #define GROUP_FAILED(pg) ((pg)->pg_state == PG_FAILED)
149 * A doubly linked list of all phyint groups in the system.
150 * A phyint group is identified by its group name.
152 struct phyint_group
{
153 char pg_name
[LIFGRNAMSIZ
]; /* Phyint group name */
154 struct phyint
*pg_phyint
; /* List of phyints in this group */
155 struct phyint_group
*pg_next
; /* Next phyint group */
156 struct phyint_group
*pg_prev
; /* Prev phyint group */
157 uint64_t pg_sig
; /* Current signature of this group */
158 int pg_probeint
; /* Interval between probes */
159 int pg_fdt
; /* Time needed to detect failure */
160 enum pg_state pg_state
; /* Current group state */
161 boolean_t pg_in_use
; /* To detect removed groups */
162 struct addrlist
*pg_addrs
; /* Data addresses in this group */
163 boolean_t pg_failmsg_printed
; /* Group failure msg printed */
167 * Phyint states; see below for the phyint definition.
170 PI_INIT
= 0, /* Phyint is being initialized */
171 PI_NOTARGETS
= 1, /* Phyint has no targets */
172 PI_RUNNING
= 2, /* Phyint is functioning */
173 PI_FAILED
= 3, /* Phyint is failed */
174 PI_OFFLINE
= 4 /* Phyint is offline */
178 * Representation of a NIC or a phyint. There is a list of all known phyints.
179 * There is also a list of phyints belonging to a phyint group, one list
183 char pi_name
[LIFNAMSIZ
+ 1]; /* Phyint name eg. le0 */
184 struct phyint_instance
*pi_v4
; /* The IPv4 instance */
185 struct phyint_instance
*pi_v6
; /* The IPv6 instance */
186 struct phyint_group
*pi_group
; /* Pointer to the group */
187 struct phyint
*pi_next
; /* List of all phyints */
188 struct phyint
*pi_prev
; /* List of all phyints */
189 struct phyint
*pi_pgnext
; /* List of phyints in this group */
190 struct phyint
*pi_pgprev
; /* List of phyints in this group */
191 uint_t pi_ifindex
; /* interface index */
192 enum pi_state pi_state
; /* State of the phyint */
193 uint64_t pi_flags
; /* Phyint flags from kernel */
194 uint16_t pi_icmpid
; /* icmp id in icmp echo request */
195 uint64_t pi_taddrthresh
; /* time (in secs) to delay logging */
196 /* about missing test addresses */
197 dlpi_handle_t pi_dh
; /* DLPI handle to underlying link */
198 uint_t pi_notes
; /* enabled DLPI notifications */
199 uchar_t pi_hwaddr
[DLPI_PHYSADDR_MAX
]; /* phyint's hw address */
200 size_t pi_hwaddrlen
; /* phyint's hw address length */
203 * The pi_whenup array is a circular buffer of the most recent
204 * times (in milliseconds since some arbitrary point of time in
205 * the past) that the interface was brought up; pi_whendx identifies
206 * the oldest element of the array.
208 uint_t pi_whenup
[LINK_UP_PERMIN
];
209 unsigned int pi_whendx
;
212 pi_taddrmsg_printed
: 1, /* testaddr msg printed */
213 pi_duptaddrmsg_printed
: 1, /* dup testaddr msg printed */
214 pi_cfgmsg_printed
: 1, /* bad config msg printed */
215 pi_lfmsg_printed
: 1, /* link-flapping msg printed */
216 pi_link_state
: 1, /* interface link state */
217 pi_hwaddrdup
: 1; /* disabled due to dup hw address */
221 * A doubly linked list of all phyint_instances each of which contains a
222 * doubly linked list of logical interfaces and targets. For eg. if both
223 * IPv4 and IPv6 are used over hme0, we have 2 phyint instances, 1 for each
226 struct phyint_instance
{
227 struct phyint_instance
*pii_next
; /* List of all phyint insts */
228 struct phyint_instance
*pii_prev
; /* List of all phyint insts */
230 struct phyint
*pii_phyint
; /* Back pointer to the phyint */
231 struct target
*pii_targets
; /* List of targets on this link */
232 struct logint
*pii_probe_logint
; /* IFF_NOFAILOVER addr for probing */
233 struct logint
*pii_logint
; /* Doubly linked list of logical ifs */
235 int pii_probe_sock
; /* Socket for ICMP Probe packets */
236 int pii_af
; /* Address family */
237 uint16_t pii_rack
; /* highest acknowledged seq number */
238 uint16_t pii_snxt
; /* sequence number of next probe */
239 uint_t pii_snxt_time
; /* actual next probe time that */
240 /* includes some randomness */
242 uint_t pii_snxt_basetime
; /* strictly periodic base probe time */
243 /* for all periodic probes */
244 uint_t pii_fd_snxt_basetime
; /* strictly periodic base probe time */
245 /* for failure detection probes */
247 hrtime_t pii_fd_hrtime
; /* hrtime_t before which we should */
248 /* not send probes out this pii */
250 uint64_t pii_flags
; /* Phyint flags from kernel */
253 uint_t pr_id
; /* Full ID of probe */
254 struct target
*pr_target
; /* Probe Target */
255 uint_t pr_time_lost
; /* Time probe declared lost */
256 struct timeval pr_tv_sent
; /* Wall time probe was sent */
257 hrtime_t pr_hrtime_start
; /* hrtime probe op started */
258 hrtime_t pr_hrtime_sent
; /* hrtime probe was sent */
259 hrtime_t pr_hrtime_ackrecv
; /* hrtime probe ack received */
260 hrtime_t pr_hrtime_ackproc
; /* hrtime probe ack processed */
261 uint_t pr_status
; /* probe status as below */
262 #define PR_UNUSED 0 /* Probe slot unused */
263 #define PR_UNACKED 1 /* Probe is unacknowledged */
264 #define PR_ACKED 2 /* Probe has been acknowledged */
265 #define PR_LOST 3 /* Probe is declared lost */
266 } pii_probes
[PROBE_STATS_COUNT
];
269 pii_in_use
: 1, /* To detect removed phyints */
270 pii_basetime_inited
: 1, /* probe time initialized */
271 pii_targets_are_routers
: 1; /* routers or hosts ? */
273 uint_t pii_probe_next
; /* next index to use in pii_probes[] */
274 struct target
*pii_target_next
; /* next target for probing */
275 struct target
*pii_rtt_target_next
;
276 /* next target for rtt probes */
278 int pii_ntargets
; /* Number of active targets */
279 struct stats
{ /* Cumulative statistics */
280 uint64_t lost
; /* Number of probes lost */
281 uint64_t acked
; /* Number of probes acked */
282 uint64_t sent
; /* Number of probes sent */
283 uint64_t unknown
; /* Number of ambiguous */
288 #define pii_name pii_phyint->pi_name
289 #define pii_ifindex pii_phyint->pi_ifindex
290 #define pii_state pii_phyint->pi_state
291 #define pii_icmpid pii_phyint->pi_icmpid
293 #define PR_STATUS_VALID(status) ((status) <= PR_LOST)
297 * A doubly linked list of prefixes or logicals, hanging off the
301 struct logint
*li_next
; /* Next logint of this phyint inst. */
302 struct logint
*li_prev
; /* Prev logint of this phyint inst. */
303 struct phyint_instance
*li_phyint_inst
;
304 /* Back pointer to phyint inst. */
306 char li_name
[LIFNAMSIZ
+ 1]; /* name Eg. hme0:1 */
307 struct in6_addr li_addr
; /* IP address */
308 struct in6_addr li_dstaddr
; /* Dst IP address for pointopoint */
309 struct in6_addr li_subnet
; /* prefix / subnet */
310 uint_t li_subnet_len
; /* prefix / subnet length */
311 uint64_t li_flags
; /* IFF_* flags */
313 li_in_use
: 1, /* flag to detect deleted logints */
314 li_dupaddr
: 1; /* test address is not unique */
319 * Doubly-linked list of probe targets on a phyint instance. Probe targets are
320 * usually onlink routers. If no onlink routers can be found, onlink hosts
324 struct target
*tg_next
; /* Next target for this phyint inst. */
325 struct target
*tg_prev
; /* Prev target for this phyint inst. */
326 struct phyint_instance
*tg_phyint_inst
;
327 /* Back pointer to phyint instance */
329 struct in6_addr tg_address
; /* Target IP address */
330 int tg_status
; /* Status of the target below */
331 #define TG_ACTIVE 1 /* active probe target */
332 #define TG_UNUSED 2 /* target not in use now */
333 #define TG_SLOW 3 /* rtt is high - Not in use now */
334 #define TG_DEAD 4 /* Target is not responding */
336 hrtime_t tg_latime
; /* Target's last active time */
337 int64_t tg_rtt_sa
; /* Scaled RTT average (in ns) */
338 int64_t tg_rtt_sd
; /* Scaled RTT deviation (in ns) */
339 int tg_crtt
; /* Conservative RTT = A + 4D (in ms) */
341 tg_in_use
: 1; /* In use flag */
342 int64_t tg_deferred
[MAXDEFERREDRTT
+ 1];
343 /* Deferred rtt data points */
345 /* Number of deferred rtt data points */
348 #define TG_STATUS_VALID(status) \
349 (((status) >= TG_ACTIVE) && ((status) <= TG_DEAD))
352 * Statistics about consecutive probe failures are passed around between
353 * functions in this structure.
355 struct probe_fail_count
357 uint_t pf_tff
; /* Earliest time of failure in a series */
358 int pf_nfail
; /* Number of consecutive probe failures */
359 int pf_nfail_tg
; /* Number of consecutive probe fails for */
360 /* some given target 'tg' */
364 * Statistics about consecutive probe successes is passed around between
365 * functions in this structure.
367 struct probe_success_count
369 uint_t ps_tls
; /* Most recent time of probe success */
370 boolean_t ps_tls_valid
; /* is ps_tls valid */
371 int ps_nsucc
; /* Number of consecutive probe successes */
372 /* starting from the most recent */
373 int ps_nsucc_tg
; /* Number of consecutive probe successes */
374 /* for some given target 'tg' */
378 * Statistics about missed probes that were never sent.
379 * Happens due to scheduling delay.
384 uint_t pm_nprobes
; /* Cumulative number of missed probes */
385 uint_t pm_ntimes
; /* Total number of occasions */
388 typedef struct addrlist
{
389 struct addrlist
*al_next
; /* next address */
390 char al_name
[LIFNAMSIZ
]; /* address lif name */
391 uint64_t al_flags
; /* address flags */
392 struct sockaddr_storage al_addr
; /* address */
398 extern addrlist_t
*localaddrs
;
399 /* List of all local addresses, including local zones */
400 extern struct phyint
*phyints
; /* List of all phyints */
401 extern struct phyint_group
*phyint_groups
; /* List of all phyint groups */
402 extern struct phyint_group
*phyint_anongroup
; /* Pointer to the anon group */
403 extern struct phyint_instance
*phyint_instances
;
404 /* List of all phyint instances */
405 extern struct probes_missed probes_missed
;
406 /* statistics about missed probes */
409 * Function prototypes
411 extern int phyint_init(void);
412 extern struct phyint
*phyint_lookup(const char *name
);
413 extern struct phyint_instance
*phyint_inst_lookup(int af
, char *name
);
414 extern struct phyint_instance
*phyint_inst_init_from_k(int af
, char *name
);
415 extern struct phyint_instance
*phyint_inst_other(struct phyint_instance
*pii
);
416 extern int phyint_inst_update_from_k(struct phyint_instance
*pii
);
417 extern void phyint_inst_delete(struct phyint_instance
*pii
);
418 extern uint_t
phyint_inst_timer(struct phyint_instance
*pii
);
419 extern boolean_t
phyint_inst_sockinit(struct phyint_instance
*pii
);
421 extern void phyint_changed(struct phyint
*pi
);
422 extern void phyint_chstate(struct phyint
*pi
, enum pi_state state
);
423 extern void phyint_group_chstate(struct phyint_group
*pg
, enum pg_state state
);
424 extern struct phyint_group
*phyint_group_create(const char *pg_name
);
425 extern struct phyint_group
*phyint_group_lookup(const char *pg_name
);
426 extern void phyint_group_insert(struct phyint_group
*pg
);
427 extern void phyint_group_delete(struct phyint_group
*pg
);
428 extern void phyint_group_refresh_state(struct phyint_group
*pg
);
429 extern void phyint_standby_refresh_inactive(struct phyint
*pi
);
430 extern void phyint_check_for_repair(struct phyint
*pi
);
431 extern void phyint_transition_to_running(struct phyint
*pi
);
432 extern void phyint_activate_another(struct phyint
*pi
);
433 extern int phyint_offline(struct phyint
*pi
, unsigned int);
434 extern int phyint_undo_offline(struct phyint
*pi
);
435 extern boolean_t
phyint_is_functioning(struct phyint
*pi
);
437 extern void logint_init_from_k(struct phyint_instance
*pii
, char *li_name
);
438 extern void logint_delete(struct logint
*li
);
440 extern struct target
*target_lookup(struct phyint_instance
*pii
,
441 struct in6_addr addr
);
442 extern void target_create(struct phyint_instance
*pii
,
443 struct in6_addr addr
, boolean_t is_router
);
444 extern void target_delete(struct target
*tg
);
445 extern struct target
*target_next(struct target
*tg
);
446 extern void target_add(struct phyint_instance
*pii
, struct in6_addr addr
,
447 boolean_t is_router
);
449 extern void in_data(struct phyint_instance
*pii
);
450 extern void in6_data(struct phyint_instance
*pii
);
452 extern void logperror_pii(struct phyint_instance
*pii
, const char *str
);
453 extern void logperror_li(struct logint
*li
, const char *str
);
454 extern char *pr_addr(int af
, struct in6_addr addr
, char *abuf
, int len
);
455 extern void addr2storage(int af
, const struct in6_addr
*addr
,
456 struct sockaddr_storage
*ssp
);
457 extern void phyint_inst_print_all(void);
458 extern boolean_t
prefix_equal(struct in6_addr
, struct in6_addr
, uint_t
);
460 extern void reset_crtt_all(struct phyint
*pi
);
461 extern int failure_state(struct phyint_instance
*pii
);
462 extern void process_link_state_changes(void);
463 extern void clear_pii_probe_stats(struct phyint_instance
*pii
);
464 extern void start_timer(struct phyint_instance
*pii
);
465 extern void stop_probing(struct phyint
*pi
);
467 extern boolean_t
own_address(struct in6_addr addr
);
468 extern boolean_t
change_pif_flags(struct phyint
*pi
, uint64_t set
,
471 extern void close_probe_socket(struct phyint_instance
*pii
, boolean_t flag
);
472 extern int probe_state_event(struct probe_stats
*, struct phyint_instance
*);
473 extern void probe_chstate(struct probe_stats
*, struct phyint_instance
*, int);
475 extern unsigned int getgraddrinfo(const char *, struct sockaddr_storage
*,
477 extern unsigned int getifinfo(const char *, ipmp_ifinfo_t
**);
478 extern unsigned int getgroupinfo(const char *, ipmp_groupinfo_t
**);
479 extern unsigned int getgrouplist(ipmp_grouplist_t
**);
480 extern unsigned int getsnap(ipmp_snap_t
**);
482 extern boolean_t
addrlist_add(addrlist_t
**, const char *, uint64_t,
483 struct sockaddr_storage
*);
484 extern void addrlist_free(addrlist_t
**);
490 #endif /* _MPD_TABLES_H */