etc/services - sync with NetBSD-8
[minix.git] / minix / net / lwip / rtsock.c
blob7af8bb296278037024b35476bb6bbde63b2a7f24
1 /* LWIP service - rtsock.c - routing sockets and route sysctl support */
2 /*
3 * In a nutshell, the intended abstraction is that only this module deals with
4 * route messages, message headers, and RTA arrays, whereas other modules
5 * (ifaddr, route) are responsible for parsing and providing sockaddr_* type
6 * addresses, with the exception of compression and expansion which is
7 * particular to routing sockets. Concretely, there should be no reference to
8 * (e.g.) rt_msghdr outside this module, and no mention of ip_addr_t inside it.
9 */
11 #include "lwip.h"
12 #include "ifaddr.h"
13 #include "rtsock.h"
14 #include "route.h"
15 #include "lldata.h"
17 /* The number of routing sockets. */
18 #define NR_RTSOCK 8
21 * The send buffer maximum determines the maximum size of requests. The
22 * maximum possible request size is the size of the routing message header plus
23 * RTAX_MAX times the maximum socket address size, including alignment. That
24 * currently works out to a number in the low 400s, so 512 should be fine for
25 * now. At this time we do not support changing the send buffer size, because
26 * there really is no point in doing so. Hence also no RT_SNDBUF_{MIN,DEF}.
28 #define RT_SNDBUF_MAX 512 /* maximum RT send buffer size */
30 #define RT_RCVBUF_MIN 0 /* minimum RT receive buffer size */
31 #define RT_RCVBUF_DEF 16384 /* default RT receive buffer size */
32 #define RT_RCVBUF_MAX 65536 /* maximum RT receive buffer size */
34 /* Address length of routing socket address structures; two bytes only. */
35 #define RTSOCK_ADDR_LEN offsetof(struct sockaddr, sa_data)
37 struct rtsock_rta {
38 const void *rta_ptr[RTAX_MAX];
39 socklen_t rta_len[RTAX_MAX];
42 static const char rtsock_padbuf[RT_ROUNDUP(0)];
44 static struct rtsock {
45 struct sock rt_sock; /* socket object, MUST be first */
46 int rt_family; /* address family filter if not zero */
47 unsigned int rt_flags; /* routing socket flags (RTF_) */
48 struct pbuf *rt_rcvhead; /* receive buffer, first packet */
49 struct pbuf **rt_rcvtailp; /* receive buffer, last ptr-ptr */
50 size_t rt_rcvlen; /* receive buffer, length in bytes */
51 size_t rt_rcvbuf; /* receive buffer, maximum size */
52 TAILQ_ENTRY(rtsock) rt_next; /* next in active or free list */
53 } rt_array[NR_RTSOCK];
55 #define RTF_NOLOOPBACK 0x1 /* suppress reply messages */
57 static TAILQ_HEAD(, rtsock) rt_freelist; /* free routing sockets */
58 static TAILQ_HEAD(, rtsock) rt_activelist; /* active routing sockets */
60 struct rtsock_request {
61 struct rtsock *rtr_src; /* source socket of the request */
62 pid_t rtr_pid; /* process ID of requesting process */
63 int rtr_seq; /* sequence number from the request */
64 int rtr_getif; /* RTM_GET only: get interface info */
67 static const struct sockevent_ops rtsock_ops;
69 static ssize_t rtsock_info(struct rmib_call *, struct rmib_node *,
70 struct rmib_oldp *, struct rmib_newp *);
72 /* The CTL_NET PF_ROUTE subtree. */
73 static struct rmib_node net_route_table[] = {
74 [0] = RMIB_FUNC(RMIB_RO | CTLTYPE_NODE, 0, rtsock_info,
75 "rtable", "Routing table information"),
78 /* The CTL_NET PF_ROUTE node. */
79 static struct rmib_node net_route_node =
80 RMIB_NODE(RMIB_RO, net_route_table, "route", "PF_ROUTE information");
83 * Initialize the routing sockets module.
85 void
86 rtsock_init(void)
88 const int mib[] = { CTL_NET, PF_ROUTE };
89 unsigned int slot;
90 int r;
92 /* Initialize the list of free routing sockets. */
93 TAILQ_INIT(&rt_freelist);
95 for (slot = 0; slot < __arraycount(rt_array); slot++)
96 TAILQ_INSERT_TAIL(&rt_freelist, &rt_array[slot], rt_next);
98 /* Initialize the list of acive routing sockets. */
99 TAILQ_INIT(&rt_activelist);
101 /* Register the "net.route" subtree with the MIB service. */
102 if ((r = rmib_register(mib, __arraycount(mib), &net_route_node)) != OK)
103 panic("unable to register net.route RMIB tree: %d", r);
107 * Allocate a pbuf suitable for storing a routing message of 'size' bytes.
108 * Return the allocated pbuf on success, or NULL on memory allocation failure.
110 static struct pbuf *
111 rtsock_alloc(size_t size)
113 struct pbuf *pbuf;
116 * The data will currently always fit in a single pool buffer. Just in
117 * case this changes in the future, warn and fail cleanly. The rest of
118 * the code is not able to deal with buffer chains as it is, although
119 * that can be changed if necessary.
121 if (size > MEMPOOL_BUFSIZE) {
122 printf("LWIP: routing socket packet too large (%zu)\n", size);
124 return NULL;
127 pbuf = pbuf_alloc(PBUF_RAW, size, PBUF_RAM);
129 assert(pbuf == NULL || pbuf->tot_len == pbuf->len);
131 return pbuf;
135 * Initialize a routing addresses map.
137 static void
138 rtsock_rta_init(struct rtsock_rta * rta)
141 memset(rta, 0, sizeof(*rta));
145 * Set an entry in a routing addresses map. When computing sizes, 'ptr' may be
146 * NULL.
148 static void
149 rtsock_rta_set(struct rtsock_rta * rta, unsigned int rtax, const void * ptr,
150 socklen_t len)
153 assert(rtax < RTAX_MAX);
155 rta->rta_ptr[rtax] = ptr;
156 rta->rta_len[rtax] = len;
160 * Copy out a message with a header and any entries in a routing addresses map,
161 * either into a pbuf allocated for this purpose, or to a RMIB (sysctl) caller,
162 * at the given offset. If no destination is given ('pbuf ' and 'oldp' are
163 * both NULL), compute just the size of the resulting data. Otherwise, set the
164 * length and address mask fields in the header as a side effect. Return the
165 * number of bytes copied on success, and if 'pbuf' is not NULL, it is filled
166 * with a pointer to the newly allocated pbuf. Return a negative error code on
167 * failure. Note that when computing the size only, any actual data pointers
168 * ('hdr', 'msglen', 'addrs', and the pointers in 'rta') may be NULL or even
169 * invalid, even though the corresponding sizes should still be supplied.
171 static ssize_t
172 rtsock_rta_finalize(void * hdr, size_t hdrlen, u_short * msglen, int * addrs,
173 const struct rtsock_rta * rta, struct pbuf ** pbuf,
174 struct rmib_oldp * oldp, ssize_t off)
176 iovec_t iov[1 + RTAX_MAX * 2];
177 size_t len, padlen, totallen;
178 unsigned int i, iovcnt;
179 int mask;
181 assert(pbuf == NULL || oldp == NULL);
182 assert(pbuf == NULL || off == 0);
183 assert(RT_ROUNDUP(hdrlen) == hdrlen);
185 iov[0].iov_addr = (vir_bytes)hdr;
186 iov[0].iov_size = hdrlen;
187 iovcnt = 1;
189 totallen = hdrlen;
190 mask = 0;
193 * The addresses in the given RTA map, as present, should be stored in
194 * the numbering order of the map.
196 for (i = 0; i < RTAX_MAX; i++) {
197 if (rta->rta_ptr[i] == NULL)
198 continue;
200 if ((len = rta->rta_len[i]) > 0) {
201 assert(iovcnt < __arraycount(iov));
202 iov[iovcnt].iov_addr = (vir_bytes)rta->rta_ptr[i];
203 iov[iovcnt++].iov_size = len;
206 /* Note that RT_ROUNDUP(0) is not 0.. */
207 if ((padlen = RT_ROUNDUP(len) - len) > 0) {
208 assert(iovcnt < __arraycount(iov));
209 iov[iovcnt].iov_addr = (vir_bytes)rtsock_padbuf;
210 iov[iovcnt++].iov_size = padlen;
213 totallen += len + padlen;
214 mask |= (1 << i); /* convert RTAX_ to RTA_ */
217 /* If only the length was requested, return it now. */
218 if (pbuf == NULL && oldp == NULL)
219 return totallen;
222 * Casting 'hdr' would violate C99 strict aliasing rules, but the
223 * address mask is not always at the same location anyway.
225 *msglen = totallen;
226 *addrs = mask;
228 if (pbuf != NULL) {
229 if ((*pbuf = rtsock_alloc(totallen)) == NULL)
230 return ENOMEM;
232 return util_coalesce((char *)(*pbuf)->payload, totallen, iov,
233 iovcnt);
234 } else
235 return rmib_vcopyout(oldp, off, iov, iovcnt);
239 * Reduce the size of a network mask to the bytes actually used. It is highly
240 * doubtful that this extra complexity pays off in any form, but it is what the
241 * BSDs historically do. We currently implement compression for IPv4 only.
243 static void
244 rtsock_compress_netmask(struct sockaddr * sa)
246 struct sockaddr_in sin;
247 uint32_t addr;
249 if (sa->sa_family != AF_INET)
250 return; /* nothing to do */
252 memcpy(&sin, sa, sizeof(sin)); /* no type punning.. (sigh) */
254 addr = htonl(sin.sin_addr.s_addr);
256 if (addr & 0x000000ff)
257 sa->sa_len = 8;
258 else if (addr & 0x0000ffff)
259 sa->sa_len = 7;
260 else if (addr & 0x00ffffff)
261 sa->sa_len = 6;
262 else if (addr != 0)
263 sa->sa_len = 5;
264 else
265 sa->sa_len = 0;
269 * Expand a possibly compressed IPv4 or IPv6 network mask, given as 'sa', into
270 * 'mask'. Return TRUE if expansion succeeded. In that case, the resulting
271 * mask must have sa.sa_len and sa.sa_family filled in correctly, and have the
272 * appropriate size for its address family. Return FALSE if expansion failed
273 * and an error should be returned to the caller.
275 static int
276 rtsock_expand_netmask(union sockaddr_any * mask, const struct sockaddr * sa)
279 if (sa->sa_len > sizeof(*mask))
280 return FALSE;
282 memset(mask, 0, sizeof(*mask));
283 memcpy(mask, sa, sa->sa_len);
286 * Amazingly, even the address family may be chopped off, in which case
287 * an IPv4 address is implied.
289 if (sa->sa_len >= offsetof(struct sockaddr, sa_data) &&
290 sa->sa_family == AF_INET6) {
291 if (sa->sa_len > sizeof(struct sockaddr_in6))
292 return FALSE;
294 mask->sa.sa_len = sizeof(struct sockaddr_in6);
295 mask->sa.sa_family = AF_INET6;
296 } else {
297 if (sa->sa_len > sizeof(struct sockaddr_in))
298 return FALSE;
300 mask->sa.sa_len = sizeof(struct sockaddr_in);
301 mask->sa.sa_family = AF_INET;
304 return TRUE;
308 * Create a routing socket.
310 sockid_t
311 rtsock_socket(int type, int protocol, struct sock ** sockp,
312 const struct sockevent_ops ** ops)
314 struct rtsock *rt;
317 * There is no superuser check here: regular users are allowed to issue
318 * (only) RTM_GET requests on routing sockets.
320 if (type != SOCK_RAW)
321 return EPROTOTYPE;
323 /* We could accept only the protocols we know, but this is fine too. */
324 if (protocol < 0 || protocol >= AF_MAX)
325 return EPROTONOSUPPORT;
327 if (TAILQ_EMPTY(&rt_freelist))
328 return ENOBUFS;
330 rt = TAILQ_FIRST(&rt_freelist);
331 TAILQ_REMOVE(&rt_freelist, rt, rt_next);
333 rt->rt_flags = 0;
334 rt->rt_family = protocol;
335 rt->rt_rcvhead = NULL;
336 rt->rt_rcvtailp = &rt->rt_rcvhead;
337 rt->rt_rcvlen = 0;
338 rt->rt_rcvbuf = RT_RCVBUF_DEF;
340 TAILQ_INSERT_HEAD(&rt_activelist, rt, rt_next);
342 *sockp = &rt->rt_sock;
343 *ops = &rtsock_ops;
344 return SOCKID_RT | (sockid_t)(rt - rt_array);
348 * Enqueue data on the receive queue of a routing socket. The caller must have
349 * checked whether the receive buffer size allows for the receipt of the data.
351 static void
352 rtsock_enqueue(struct rtsock * rt, struct pbuf * pbuf)
355 *rt->rt_rcvtailp = pbuf;
356 rt->rt_rcvtailp = pchain_end(pbuf);
357 rt->rt_rcvlen += pchain_size(pbuf);
359 sockevent_raise(&rt->rt_sock, SEV_RECV);
363 * Determine whether a routing message for address family 'family', originated
364 * from routing socket 'rtsrc' if not NULL, should be sent to routing socket
365 * 'rt'. Return TRUE if the message should be sent to this socket, or FALSE
366 * if it should not.
368 static int
369 rtsock_can_send(struct rtsock *rt, struct rtsock *rtsrc, int family)
372 /* Do not send anything on sockets shut down for reading. */
373 if (sockevent_is_shutdown(&rt->rt_sock, SFL_SHUT_RD))
374 return FALSE;
377 * Do not send a reply message to the source of the request if the
378 * source is not interested in replies to its own requests.
380 if (rt == rtsrc && (rt->rt_flags & RTF_NOLOOPBACK))
381 return FALSE;
384 * For address family specific messages, make sure the routing socket
385 * is interested in that family. Make an exception if the socket was
386 * the source of the request, though: we currently do not prevent user
387 * processes from issuing commands for the "wrong" family.
389 if (rt->rt_family != AF_UNSPEC && family != AF_UNSPEC &&
390 rt->rt_family != family && rt != rtsrc)
391 return FALSE;
394 * See whether the receive queue of the socket is already full. We do
395 * not consider the size of the current request, in order to not drop
396 * larger messages and then enqueue smaller ones.
398 if (rt->rt_rcvlen >= rt->rt_rcvbuf)
399 return FALSE;
401 /* All is well: go on and deliver the message. */
402 return TRUE;
406 * Send the routing message in 'pbuf' to the given routing socket if possible,
407 * or check whether such a message could be sent to that socket if 'pbuf' is
408 * NULL. In the former case, the function takes ownership of 'pbuf'. The
409 * given routing socket is assumed to be the source of the routing request that
410 * generated this message. In the latter case, the function returns TRUE if
411 * the socket would take the message or FALSE if not. If 'family' is not
412 * AF_UNSPEC, it is to be the address family of the message.
414 static int
415 rtsock_msg_one(struct rtsock * rt, int family, struct pbuf * pbuf)
418 if (rtsock_can_send(rt, rt, family)) {
419 if (pbuf != NULL)
420 rtsock_enqueue(rt, pbuf);
422 return TRUE;
423 } else {
424 if (pbuf != NULL)
425 pbuf_free(pbuf);
427 return FALSE;
432 * Send the routing message in 'pbuf' to all matching routing sockets, or check
433 * whether there are any such matching routing sockets if 'pbuf' is NULL. In
434 * the former case, the function takes ownership of 'pbuf'. In the latter
435 * case, the function returns TRUE if there are any matching sockets or FALSE
436 * if there are none. If 'rtsrc' is not NULL, it is to be the routing socket
437 * that is the source of the message. If 'family' is not AF_UNSPEC, it is to
438 * be the address family of the message.
440 static int
441 rtsock_msg_match(struct rtsock * rtsrc, int family, struct pbuf * pbuf)
443 struct rtsock *rt, *rtprev;
444 struct pbuf *pcopy;
446 rtprev = NULL;
448 TAILQ_FOREACH(rt, &rt_activelist, rt_next) {
449 if (!rtsock_can_send(rt, rtsrc, family))
450 continue;
453 * There is at least one routing socket that is interested in
454 * receiving this message, and able to receive it.
456 if (pbuf == NULL)
457 return TRUE;
460 * We need to make copies of the generated message for all but
461 * the last matching socket, which gets the original. If we're
462 * out of memory, free the original and stop: there are more
463 * important things to spend memory on than routing sockets.
465 if (rtprev != NULL) {
466 if ((pcopy = rtsock_alloc(pbuf->tot_len)) == NULL) {
467 pbuf_free(pbuf);
469 return TRUE;
472 if (pbuf_copy(pcopy, pbuf) != ERR_OK)
473 panic("unexpected pbuf copy failure");
475 rtsock_enqueue(rtprev, pcopy);
478 rtprev = rt;
481 if (rtprev != NULL)
482 rtsock_enqueue(rtprev, pbuf);
483 else if (pbuf != NULL)
484 pbuf_free(pbuf);
486 return (rtprev != NULL);
490 * Dequeue and free the head of the receive queue of a routing socket.
492 static void
493 rtsock_dequeue(struct rtsock * rt)
495 struct pbuf *pbuf, **pnext;
496 size_t size;
498 pbuf = rt->rt_rcvhead;
499 assert(pbuf != NULL);
501 pnext = pchain_end(pbuf);
502 size = pchain_size(pbuf);
504 if ((rt->rt_rcvhead = *pnext) == NULL)
505 rt->rt_rcvtailp = &rt->rt_rcvhead;
507 assert(rt->rt_rcvlen >= size);
508 rt->rt_rcvlen -= size;
510 *pnext = NULL;
511 pbuf_free(pbuf);
515 * Process a routing message sent on a socket. Return OK on success, in which
516 * case the caller assumes that the processing routine has sent a reply to the
517 * user and possibly other routing sockets. Return a negative error code on
518 * failure, in which case the caller will send the reply to the user instead.
520 static int
521 rtsock_process(struct rtsock *rt, struct rt_msghdr * rtm, char * buf,
522 size_t len, int is_root)
524 struct rtsock_request rtr;
525 struct rtsock_rta rta;
526 const struct sockaddr *netmask;
527 struct sockaddr sa;
528 union sockaddr_any mask;
529 size_t off;
530 int i;
532 if (rtm->rtm_msglen != len)
533 return EINVAL;
535 if (rtm->rtm_version != RTM_VERSION) {
536 printf("LWIP: PID %d uses routing sockets version %u\n",
537 rtm->rtm_pid, rtm->rtm_version);
539 return EPROTONOSUPPORT;
543 * Make sure that we won't misinterpret the rest of the message. While
544 * looking at the message type, also make sure non-root users can only
545 * ever issue RTM_GET requests.
547 switch (rtm->rtm_type) {
548 case RTM_ADD:
549 case RTM_DELETE:
550 case RTM_CHANGE:
551 case RTM_LOCK:
552 if (!is_root)
553 return EPERM;
555 /* FALLTHROUGH */
556 case RTM_GET:
557 break;
559 default:
560 return EOPNOTSUPP;
564 * Extract all given addresses. We do not actually support all types
565 * of entries, but we cannot skip the ones we do not need either.
567 rtsock_rta_init(&rta);
569 off = sizeof(*rtm);
570 assert(off == RT_ROUNDUP(off));
572 for (i = 0; i < RTAX_MAX; i++) {
573 if (!(rtm->rtm_addrs & (1 << i)))
574 continue;
576 if (off + offsetof(struct sockaddr, sa_data) > len)
577 return EINVAL;
580 * It is safe to access sa_len and even sa_family in all cases,
581 * in particular even when the structure is of size zero.
583 assert(offsetof(struct sockaddr, sa_data) <= RT_ROUNDUP(0));
585 memcpy(&sa, &buf[off], offsetof(struct sockaddr, sa_data));
587 if (off + sa.sa_len > len)
588 return EINVAL;
590 rtsock_rta_set(&rta, i, &buf[off], sa.sa_len);
592 off += RT_ROUNDUP((size_t)sa.sa_len);
596 * Expand the given netmask if it is in compressed IPv4 form. We do
597 * this here because it is particular to routing sockets; we also do
598 * the compression in this module. Note how the compression may even
599 * strip off the address family; really, who came up with this ****?
601 netmask = (const struct sockaddr *)rta.rta_ptr[RTAX_NETMASK];
603 if (netmask != NULL) {
604 if (!rtsock_expand_netmask(&mask, netmask))
605 return EINVAL;
607 rtsock_rta_set(&rta, RTAX_NETMASK, &mask, mask.sa.sa_len);
611 * Actually process the command. Pass on enough information so that a
612 * reply can be generated on success. The abstraction as sketched at
613 * the top of the file imposes that we pass quite a few parameters.
615 rtr.rtr_src = rt;
616 rtr.rtr_pid = rtm->rtm_pid;
617 rtr.rtr_seq = rtm->rtm_seq;
618 rtr.rtr_getif = (rtm->rtm_type == RTM_GET &&
619 (rta.rta_ptr[RTAX_IFP] != NULL || rta.rta_ptr[RTAX_IFA] != NULL));
621 return route_process(rtm->rtm_type,
622 (const struct sockaddr *)rta.rta_ptr[RTAX_DST],
623 (const struct sockaddr *)rta.rta_ptr[RTAX_NETMASK],
624 (const struct sockaddr *)rta.rta_ptr[RTAX_GATEWAY],
625 (const struct sockaddr *)rta.rta_ptr[RTAX_IFP],
626 (const struct sockaddr *)rta.rta_ptr[RTAX_IFA],
627 rtm->rtm_flags, rtm->rtm_inits, &rtm->rtm_rmx, &rtr);
631 * Perform preliminary checks on a send request.
633 static int
634 rtsock_pre_send(struct sock * sock __unused, size_t len,
635 socklen_t ctl_len __unused, const struct sockaddr * addr,
636 socklen_t addr_len __unused, endpoint_t user_endpt __unused, int flags)
639 if (flags != 0)
640 return EOPNOTSUPP;
642 if (addr != NULL)
643 return EISCONN;
646 * For the most basic failures - that is, we cannot even manage to
647 * receive the request - we do not generate a reply message.
649 if (len < sizeof(struct rt_msghdr))
650 return ENOBUFS;
651 if (len > RT_SNDBUF_MAX)
652 return EMSGSIZE;
654 return OK;
658 * Send data on a routing socket.
660 static int
661 rtsock_send(struct sock * sock, const struct sockdriver_data * data,
662 size_t len, size_t * offp, const struct sockdriver_data * ctl __unused,
663 socklen_t ctl_len __unused, socklen_t * ctl_off __unused,
664 const struct sockaddr * addr __unused, socklen_t addr_len __unused,
665 endpoint_t user_endpt, int flags __unused, size_t min __unused)
667 struct rtsock *rt = (struct rtsock *)sock;
668 char buf[RT_SNDBUF_MAX] __aligned(4);
669 struct rt_msghdr rtm;
670 struct pbuf *pbuf;
671 uid_t euid;
672 int r, is_root;
674 /* Copy in the request, and adjust some fields right away. */
675 assert(len >= sizeof(rtm));
676 assert(len <= sizeof(buf));
678 if ((r = sockdriver_copyin(data, 0, buf, len)) != OK)
679 return r;
681 memcpy(&rtm, buf, sizeof(rtm));
682 rtm.rtm_errno = 0;
683 rtm.rtm_flags &= ~RTF_DONE;
684 rtm.rtm_pid = getepinfo(user_endpt, &euid, NULL /*gid*/);
686 is_root = (euid == ROOT_EUID);
688 /* Process the request. */
689 r = rtsock_process(rt, &rtm, buf, len, is_root);
692 * If the request has been processed successfully, a reply has been
693 * sent already, possibly also to other routing sockets. Here, we
694 * handle the case that the request has resulted in failure, in which
695 * case we send a reply to the caller only. This behavior is different
696 * from the traditional BSD behavior, which also sends failure replies
697 * to other sockets. Our motivation is that while other parties are
698 * never going to be interested in failures anyway, it is in fact easy
699 * for an unprivileged user process to abuse the failure-reply system
700 * in order to fake other types of routing messages (e.g., RTM_IFINFO)
701 * to other parties. By sending failure replies only to the requestor,
702 * we eliminate the need for security-sensitive request validation.
704 if (r != OK && rtsock_can_send(rt, rt, AF_UNSPEC)) {
705 rtm.rtm_errno = -r;
707 if ((pbuf = rtsock_alloc(len)) == NULL)
708 return ENOMEM;
710 /* For the reply, reuse the request message largely as is. */
711 memcpy(pbuf->payload, &rtm, sizeof(rtm));
712 if (len > sizeof(rtm))
713 memcpy((uint8_t *)pbuf->payload + sizeof(rtm),
714 buf + sizeof(rtm), len - sizeof(rtm));
716 rtsock_enqueue(rt, pbuf);
717 } else if (r == OK)
718 *offp = len;
720 return r;
724 * Perform preliminary checks on a receive request.
726 static int
727 rtsock_pre_recv(struct sock * sock __unused, endpoint_t user_endpt __unused,
728 int flags)
732 * We accept the same flags across all socket types in LWIP, and then
733 * simply ignore the ones we do not support for routing sockets.
735 if ((flags & ~(MSG_PEEK | MSG_WAITALL)) != 0)
736 return EOPNOTSUPP;
738 return OK;
742 * Receive data on a routing socket.
744 static int
745 rtsock_recv(struct sock * sock, const struct sockdriver_data * data,
746 size_t len, size_t * off, const struct sockdriver_data * ctl __unused,
747 socklen_t ctl_len __unused, socklen_t * ctl_off __unused,
748 struct sockaddr * addr, socklen_t * addr_len,
749 endpoint_t user_endpt __unused, int flags, size_t min __unused,
750 int * rflags)
752 struct rtsock *rt = (struct rtsock *)sock;
753 struct pbuf *pbuf;
754 int r;
756 if ((pbuf = rt->rt_rcvhead) == NULL)
757 return SUSPEND;
759 /* Copy out the data to the calling user process. */
760 if (len >= pbuf->tot_len)
761 len = pbuf->tot_len;
762 else
763 *rflags |= MSG_TRUNC;
765 r = util_copy_data(data, len, 0, pbuf, 0, FALSE /*copy_in*/);
767 if (r != OK)
768 return r;
770 /* Generate a dummy source address. */
771 addr->sa_len = RTSOCK_ADDR_LEN;
772 addr->sa_family = AF_ROUTE;
773 *addr_len = RTSOCK_ADDR_LEN;
775 /* Discard the data now, unless we were instructed to peek only. */
776 if (!(flags & MSG_PEEK))
777 rtsock_dequeue(rt);
779 /* Return the received part of the data length. */
780 *off = len;
781 return OK;
785 * Test whether data can be received on a routing socket, and if so, how many
786 * bytes of data.
788 static int
789 rtsock_test_recv(struct sock * sock, size_t min __unused, size_t * size)
791 struct rtsock *rt = (struct rtsock *)sock;
793 if (rt->rt_rcvhead == NULL)
794 return SUSPEND;
796 if (size != NULL)
797 *size = rt->rt_rcvhead->tot_len;
798 return OK;
802 * Set socket options on a routing socket.
804 static int
805 rtsock_setsockopt(struct sock * sock, int level, int name,
806 const struct sockdriver_data * data, socklen_t len)
808 struct rtsock *rt = (struct rtsock *)sock;
809 int r, val;
811 if (level == SOL_SOCKET) {
812 switch (name) {
813 case SO_USELOOPBACK:
814 if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
815 len)) != OK)
816 return r;
818 if (!val)
819 rt->rt_flags |= RTF_NOLOOPBACK;
820 else
821 rt->rt_flags &= ~RTF_NOLOOPBACK;
823 return OK;
825 case SO_RCVBUF:
826 if ((r = sockdriver_copyin_opt(data, &val, sizeof(val),
827 len)) != OK)
828 return r;
830 if (val < RT_RCVBUF_MIN || val > RT_RCVBUF_MAX)
831 return EINVAL;
833 rt->rt_rcvbuf = (size_t)val;
835 return OK;
839 return ENOPROTOOPT;
843 * Retrieve socket options on a routing socket.
845 static int
846 rtsock_getsockopt(struct sock * sock, int level, int name,
847 const struct sockdriver_data * data, socklen_t * len)
849 struct rtsock *rt = (struct rtsock *)sock;
850 int val;
852 if (level == SOL_SOCKET) {
853 switch (name) {
854 case SO_USELOOPBACK:
855 val = !(rt->rt_flags & RTF_NOLOOPBACK);
857 return sockdriver_copyout_opt(data, &val, sizeof(val),
858 len);
860 case SO_RCVBUF:
861 val = rt->rt_rcvbuf;
863 return sockdriver_copyout_opt(data, &val, sizeof(val),
864 len);
868 return ENOPROTOOPT;
872 * Retrieve the local or remote socket address of a routing socket.
874 static int
875 rtsock_getname(struct sock * sock __unused, struct sockaddr * addr,
876 socklen_t * addr_len)
879 /* This is entirely useless but apparently common between OSes. */
880 addr->sa_len = RTSOCK_ADDR_LEN;
881 addr->sa_family = AF_ROUTE;
882 *addr_len = RTSOCK_ADDR_LEN;
884 return OK;
888 * Drain the receive queue of a routing socket.
890 static void
891 rtsock_drain(struct rtsock * rt)
894 while (rt->rt_rcvhead != NULL)
895 rtsock_dequeue(rt);
899 * Shut down a routing socket for reading and/or writing.
901 static int
902 rtsock_shutdown(struct sock * sock, unsigned int mask)
904 struct rtsock *rt = (struct rtsock *)sock;
906 if (mask & SFL_SHUT_RD)
907 rtsock_drain(rt);
909 return OK;
913 * Close a routing socket.
915 static int
916 rtsock_close(struct sock * sock, int force __unused)
918 struct rtsock *rt = (struct rtsock *)sock;
920 rtsock_drain(rt);
922 return OK;
926 * Free up a closed routing socket.
928 static void
929 rtsock_free(struct sock * sock)
931 struct rtsock *rt = (struct rtsock *)sock;
933 TAILQ_REMOVE(&rt_activelist, rt, rt_next);
935 TAILQ_INSERT_HEAD(&rt_freelist, rt, rt_next);
938 static const struct sockevent_ops rtsock_ops = {
939 .sop_pre_send = rtsock_pre_send,
940 .sop_send = rtsock_send,
941 .sop_pre_recv = rtsock_pre_recv,
942 .sop_recv = rtsock_recv,
943 .sop_test_recv = rtsock_test_recv,
944 .sop_setsockopt = rtsock_setsockopt,
945 .sop_getsockopt = rtsock_getsockopt,
946 .sop_getsockname = rtsock_getname,
947 .sop_getpeername = rtsock_getname,
948 .sop_shutdown = rtsock_shutdown,
949 .sop_close = rtsock_close,
950 .sop_free = rtsock_free
954 * Send an interface announcement message about the given interface. If
955 * 'arrival' is set, the interface has just been created; otherwise, the
956 * interface is about to be destroyed.
958 void
959 rtsock_msg_ifannounce(struct ifdev * ifdev, int arrival)
961 struct if_announcemsghdr ifan;
962 struct pbuf *pbuf;
964 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, NULL /*pbuf*/))
965 return;
967 memset(&ifan, 0, sizeof(ifan));
968 ifan.ifan_msglen = sizeof(ifan);
969 ifan.ifan_version = RTM_VERSION;
970 ifan.ifan_type = RTM_IFANNOUNCE;
971 ifan.ifan_index = ifdev_get_index(ifdev);
972 strlcpy(ifan.ifan_name, ifdev_get_name(ifdev), sizeof(ifan.ifan_name));
973 ifan.ifan_what = (arrival) ? IFAN_ARRIVAL : IFAN_DEPARTURE;
975 if ((pbuf = rtsock_alloc(sizeof(ifan))) == NULL)
976 return;
977 memcpy(pbuf->payload, &ifan, sizeof(ifan));
979 rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, pbuf);
983 * Send an interface information routing message.
985 void
986 rtsock_msg_ifinfo(struct ifdev * ifdev)
988 struct if_msghdr ifm;
989 struct pbuf *pbuf;
991 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, NULL /*pbuf*/))
992 return;
994 memset(&ifm, 0, sizeof(ifm));
995 ifm.ifm_msglen = sizeof(ifm);
996 ifm.ifm_version = RTM_VERSION;
997 ifm.ifm_type = RTM_IFINFO;
998 ifm.ifm_addrs = 0;
999 ifm.ifm_flags = ifdev_get_ifflags(ifdev);
1000 ifm.ifm_index = ifdev_get_index(ifdev);
1001 memcpy(&ifm.ifm_data, ifdev_get_ifdata(ifdev), sizeof(ifm.ifm_data));
1003 if ((pbuf = rtsock_alloc(sizeof(ifm))) == NULL)
1004 return;
1005 memcpy(pbuf->payload, &ifm, sizeof(ifm));
1007 rtsock_msg_match(NULL /*rtsrc*/, AF_UNSPEC, pbuf);
1011 * Set up a RTA map and an interface address structure for use in a RTM_xxxADDR
1012 * routing message.
1014 static void
1015 rtsock_rta_init_ifam(struct rtsock_rta * rta, struct ifa_msghdr * ifam,
1016 struct ifdev * ifdev, unsigned int type, struct sockaddr_dlx * sdlx)
1019 memset(ifam, 0, sizeof(*ifam));
1020 ifam->ifam_version = RTM_VERSION;
1021 ifam->ifam_type = type;
1022 ifam->ifam_flags = 0;
1023 ifam->ifam_index = ifdev_get_index(ifdev);
1024 ifam->ifam_metric = ifdev_get_metric(ifdev);
1026 rtsock_rta_init(rta);
1028 ifaddr_dl_get(ifdev, (ifaddr_dl_num_t)0, sdlx);
1030 rtsock_rta_set(rta, RTAX_IFP, sdlx, sdlx->sdlx_len);
1034 * Add a specific link-layer address for an interface to the given RTA map.
1036 static void
1037 rtsock_rta_add_dl(struct rtsock_rta * rta, struct ifdev * ifdev,
1038 ifaddr_dl_num_t num, struct sockaddr_dlx * sdlx)
1041 /* Obtain the address data. */
1042 ifaddr_dl_get(ifdev, num, sdlx);
1044 /* Add the interface address. */
1045 rtsock_rta_set(rta, RTAX_IFA, sdlx, sdlx->sdlx_len);
1048 * NetBSD also adds a RTAX_NETMASK entry here. At this moment it is
1049 * not clear to me why, and it is a pain to make, so for now we do not.
1054 * Send a routing message about a new, changed, or deleted datalink address for
1055 * the given interface.
1057 void
1058 rtsock_msg_addr_dl(struct ifdev * ifdev, unsigned int type,
1059 ifaddr_dl_num_t num)
1061 struct rtsock_rta rta;
1062 struct ifa_msghdr ifam;
1063 struct sockaddr_dlx name, addr;
1064 struct pbuf *pbuf;
1066 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_LINK, NULL /*pbuf*/))
1067 return;
1069 rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name);
1071 rtsock_rta_add_dl(&rta, ifdev, num, &addr);
1073 if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen,
1074 &ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0)
1075 rtsock_msg_match(NULL /*rtsrc*/, AF_LINK, pbuf);
1079 * Add a specific IPv4 address for an interface to the given RTA map.
1081 static void
1082 rtsock_rta_add_v4(struct rtsock_rta * rta, struct ifdev * ifdev,
1083 ifaddr_v4_num_t num, struct sockaddr_in sin[4])
1086 /* Obtain the address data. */
1087 (void)ifaddr_v4_get(ifdev, num, &sin[0], &sin[1], &sin[2], &sin[3]);
1089 /* Add the interface address. */
1090 rtsock_rta_set(rta, RTAX_IFA, &sin[0], sin[0].sin_len);
1092 /* Add the netmask, after compressing it. */
1093 rtsock_compress_netmask((struct sockaddr *)&sin[1]);
1095 rtsock_rta_set(rta, RTAX_NETMASK, &sin[1], sin[1].sin_len);
1097 /* Possibly add a broadcast or destination address. */
1098 if (sin[2].sin_len != 0)
1099 rtsock_rta_set(rta, RTAX_BRD, &sin[2], sin[2].sin_len);
1100 else if (sin[3].sin_len != 0)
1101 rtsock_rta_set(rta, RTAX_DST, &sin[3], sin[3].sin_len);
1105 * Send a routing message about a new or deleted IPv4 address for the given
1106 * interface.
1108 void
1109 rtsock_msg_addr_v4(struct ifdev * ifdev, unsigned int type,
1110 ifaddr_v4_num_t num)
1112 struct rtsock_rta rta;
1113 struct ifa_msghdr ifam;
1114 struct sockaddr_dlx name;
1115 struct sockaddr_in sin[4];
1116 struct pbuf *pbuf;
1118 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_INET, NULL /*pbuf*/))
1119 return;
1121 rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name);
1123 rtsock_rta_add_v4(&rta, ifdev, num, sin);
1125 if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen,
1126 &ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0)
1127 rtsock_msg_match(NULL /*rtsrc*/, AF_INET, pbuf);
1131 * Add a specific IPv6 address for an interface to the given RTA map.
1133 static void
1134 rtsock_rta_add_v6(struct rtsock_rta * rta, struct ifdev * ifdev,
1135 ifaddr_v6_num_t num, struct sockaddr_in6 sin6[3])
1138 /* Obtain the address data. */
1139 ifaddr_v6_get(ifdev, num, &sin6[0], &sin6[1], &sin6[2]);
1141 /* Add the interface address. */
1142 rtsock_rta_set(rta, RTAX_IFA, &sin6[0], sin6[0].sin6_len);
1144 /* Add the netmask, after compressing it (a no-op at the moment). */
1145 rtsock_compress_netmask((struct sockaddr *)&sin6[1]);
1147 rtsock_rta_set(rta, RTAX_NETMASK, &sin6[1], sin6[1].sin6_len);
1149 /* Possibly add a destination address. */
1150 if (sin6[2].sin6_len != 0)
1151 rtsock_rta_set(rta, RTAX_DST, &sin6[2], sin6[2].sin6_len);
1155 * Send a routing message about a new or deleted IPv6 address for the given
1156 * interface.
1158 void
1159 rtsock_msg_addr_v6(struct ifdev * ifdev, unsigned int type,
1160 ifaddr_v6_num_t num)
1162 struct rtsock_rta rta;
1163 struct ifa_msghdr ifam;
1164 struct sockaddr_dlx name;
1165 struct sockaddr_in6 sin6[3];
1166 struct pbuf *pbuf;
1168 if (!rtsock_msg_match(NULL /*rtsrc*/, AF_INET6, NULL /*pbuf*/))
1169 return;
1171 rtsock_rta_init_ifam(&rta, &ifam, ifdev, type, &name);
1173 rtsock_rta_add_v6(&rta, ifdev, num, sin6);
1175 if (rtsock_rta_finalize(&ifam, sizeof(ifam), &ifam.ifam_msglen,
1176 &ifam.ifam_addrs, &rta, &pbuf, NULL, 0) > 0)
1177 rtsock_msg_match(NULL /*rtsrc*/, AF_INET6, pbuf);
1181 * Send an RTM_MISS routing message about an address for which no route was
1182 * found. The caller must provide the address in the appropriate form and
1183 * perform any per-address rate limiting.
1185 void
1186 rtsock_msg_miss(const struct sockaddr * addr)
1188 struct rt_msghdr rtm;
1189 struct rtsock_rta rta;
1190 struct pbuf *pbuf;
1193 * Unfortunately the destination address has already been generated (as
1194 * 'addr'), which is a big part of the work. Still, skip the rest if
1195 * there is no routing socket to deliver the message to.
1197 if (!rtsock_msg_match(NULL /*rtsrc*/, addr->sa_family, NULL /*pbuf*/))
1198 return;
1200 memset(&rtm, 0, sizeof(rtm));
1201 rtm.rtm_version = RTM_VERSION;
1202 rtm.rtm_type = RTM_MISS;
1204 rtsock_rta_init(&rta);
1206 rtsock_rta_set(&rta, RTAX_DST, addr, addr->sa_len);
1208 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
1209 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
1210 rtsock_msg_match(NULL /*rtsrc*/, addr->sa_family, pbuf);
1214 * Generate routing socket data for a route, for either routing socket
1215 * broadcasting or a sysctl(7) request. The route is given as 'route'. The
1216 * type of the message (RTM_) is given as 'type'. The resulting routing
1217 * message header is stored in 'rtm' and an address vector is stored in 'rta'.
1218 * The latter may point to addresses generated in 'addr', 'mask', 'gateway',
1219 * and optionally (if not NULL) 'ifp' and 'ifa'. The caller is responsible for
1220 * combining the results into an appropriate routing message.
1222 static void
1223 rtsock_get_route(struct rt_msghdr * rtm, struct rtsock_rta * rta,
1224 union sockaddr_any * addr, union sockaddr_any * mask,
1225 union sockaddr_any * gateway, union sockaddr_any * ifp,
1226 union sockaddr_any * ifa, const struct route_entry * route,
1227 unsigned int type)
1229 struct ifdev *ifdev;
1230 unsigned int flags, use;
1232 route_get(route, addr, mask, gateway, ifp, ifa, &ifdev, &flags, &use);
1234 memset(rtm, 0, sizeof(*rtm));
1235 rtm->rtm_version = RTM_VERSION;
1236 rtm->rtm_type = type;
1237 rtm->rtm_flags = flags;
1238 rtm->rtm_index = ifdev_get_index(ifdev);
1239 rtm->rtm_use = use;
1241 rtsock_rta_init(rta);
1243 rtsock_rta_set(rta, RTAX_DST, addr, addr->sa.sa_len);
1245 if (!(flags & RTF_HOST)) {
1246 rtsock_compress_netmask(&mask->sa);
1248 rtsock_rta_set(rta, RTAX_NETMASK, mask, mask->sa.sa_len);
1251 rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sa.sa_len);
1253 if (ifp != NULL)
1254 rtsock_rta_set(rta, RTAX_IFP, ifp, ifp->sa.sa_len);
1256 if (ifa != NULL)
1257 rtsock_rta_set(rta, RTAX_IFA, ifa, ifa->sa.sa_len);
1261 * Send a routing message about a route, with the given type which may be one
1262 * of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The routing
1263 * socket request information 'rtr', if not NULL, provides additional
1264 * information about the routing socket that was the source of the request (if
1265 * any), various fields that should be echoed, and (for RTM_GET) whether to
1266 * add interface information to the output.
1268 void
1269 rtsock_msg_route(const struct route_entry * route, unsigned int type,
1270 const struct rtsock_request * rtr)
1272 union sockaddr_any addr, mask, gateway, ifp, ifa;
1273 struct rt_msghdr rtm;
1274 struct rtsock_rta rta;
1275 struct rtsock *rtsrc;
1276 struct pbuf *pbuf;
1277 int family, getif;
1279 rtsrc = (rtr != NULL) ? rtr->rtr_src : NULL;
1280 family = (route_is_ipv6(route)) ? AF_INET6 : AF_INET;
1282 if (!rtsock_msg_match(rtsrc, family, NULL /*pbuf*/))
1283 return;
1285 getif = (rtr != NULL && rtr->rtr_getif);
1287 rtsock_get_route(&rtm, &rta, &addr, &mask, &gateway,
1288 (getif) ? &ifp : NULL, (getif) ? &ifa : NULL, route, type);
1290 if (rtr != NULL) {
1291 rtm.rtm_flags |= RTF_DONE;
1292 rtm.rtm_pid = rtr->rtr_pid;
1293 rtm.rtm_seq = rtr->rtr_seq;
1296 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
1297 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
1298 rtsock_msg_match(rtsrc, family, pbuf);
1302 * Generate sysctl(7) output or length for the given routing table entry
1303 * 'route', provided that the route passes the flags filter 'filter'. The
1304 * address length 'addr_len' is used to compute a cheap length estimate. On
1305 * success, return the byte size of the output. If the route was not a match
1306 * for the filter, return zero. On failure, return a negative error code.
1308 static ssize_t
1309 rtsock_info_rtable_entry(const struct route_entry * route, unsigned int filter,
1310 socklen_t addr_len, struct rmib_oldp * oldp, size_t off)
1312 union sockaddr_any addr, mask, gateway;
1313 struct rt_msghdr rtm;
1314 struct rtsock_rta rta;
1315 unsigned int flags;
1316 ssize_t len;
1318 flags = route_get_flags(route);
1320 /* Apparently, matching any of the flags (if given) is sufficient. */
1321 if (filter != 0 && (filter & flags) != 0)
1322 return 0;
1324 /* Size (over)estimation shortcut. */
1325 if (oldp == NULL) {
1326 len = sizeof(rtm) + RT_ROUNDUP(addr_len) +
1327 RT_ROUNDUP(sizeof(gateway));
1329 if (!(flags & RTF_HOST))
1330 len += RT_ROUNDUP(addr_len);
1332 return len;
1335 rtsock_get_route(&rtm, &rta, &addr, &mask, &gateway, NULL /*ifp*/,
1336 NULL /*ifa*/, route, RTM_GET);
1338 return rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
1339 &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp, off);
1343 * Obtain routing table entries.
1345 static ssize_t
1346 rtsock_info_rtable(struct rmib_oldp * oldp, int family, int filter)
1348 struct route_entry *route;
1349 ssize_t r, off;
1351 off = 0;
1353 if (family == AF_UNSPEC || family == AF_INET) {
1354 for (route = NULL; (route = route_enum_v4(route)) != NULL; ) {
1355 if ((r = rtsock_info_rtable_entry(route,
1356 (unsigned int)filter, sizeof(struct sockaddr_in),
1357 oldp, off)) < 0)
1358 return r;
1359 off += r;
1363 if (family == AF_UNSPEC || family == AF_INET6) {
1364 for (route = NULL; (route = route_enum_v6(route)) != NULL; ) {
1365 if ((r = rtsock_info_rtable_entry(route,
1366 (unsigned int)filter, sizeof(struct sockaddr_in6),
1367 oldp, off)) < 0)
1368 return r;
1369 off += r;
1373 /* TODO: should we add slack here? */
1374 return off;
1378 * Generate routing socket data for an ARP table entry, for either routing
1379 * socket broadcasting or a sysctl(7) request. The ARP table entry number is
1380 * given as 'num'. The type of the message (RTM_) is given as 'type'. The
1381 * resulting routing message header is stored in 'rtm' and an address vector is
1382 * stored in 'rta'. The latter may point to addresses generated in 'addr' and
1383 * 'gateway'. The caller is responsible for combining the results into an
1384 * appropriate routing message.
1386 static void
1387 rtsock_get_arp(struct rt_msghdr * rtm, struct rtsock_rta * rta,
1388 struct sockaddr_in * addr, struct sockaddr_dlx * gateway,
1389 lldata_arp_num_t num, unsigned int type)
1391 struct ifdev *ifdev;
1392 unsigned int flags;
1394 lldata_arp_get(num, addr, gateway, &ifdev, &flags);
1396 memset(rtm, 0, sizeof(*rtm));
1397 rtm->rtm_version = RTM_VERSION;
1398 rtm->rtm_type = type;
1399 rtm->rtm_flags = flags;
1400 rtm->rtm_index = ifdev_get_index(ifdev);
1402 /* TODO: obtaining and reporting the proper expiry time, if any. */
1403 if (!(flags & RTF_STATIC))
1404 rtm->rtm_rmx.rmx_expire = (time_t)-1;
1406 rtsock_rta_init(rta);
1408 rtsock_rta_set(rta, RTAX_DST, addr, addr->sin_len);
1410 rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sdlx_len);
1414 * Send a routing message about an ARP table entry, with the given type which
1415 * may be one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The
1416 * routing socket request information 'rtr', if not NULL, provides additional
1417 * information about the routing socket that was the source of the request (if
1418 * any) and various fields that should be echoed.
1420 void
1421 rtsock_msg_arp(lldata_arp_num_t num, unsigned int type,
1422 const struct rtsock_request * rtr)
1424 struct sockaddr_in addr;
1425 struct sockaddr_dlx gateway;
1426 struct rt_msghdr rtm;
1427 struct rtsock_rta rta;
1428 struct pbuf *pbuf;
1430 assert(rtr != NULL);
1433 * We do not maintain the link-local tables ourselves, and thus, we do
1434 * not have a complete view of modifications to them. In order not to
1435 * confuse userland with inconsistent updates (e.g., deletion of
1436 * previously unreported entries), send these routing messages to the
1437 * source of the routing request only.
1439 if (!rtsock_msg_one(rtr->rtr_src, AF_INET, NULL /*pbuf*/))
1440 return;
1442 rtsock_get_arp(&rtm, &rta, &addr, &gateway, num, type);
1444 if (rtr != NULL) {
1445 rtm.rtm_flags |= RTF_DONE;
1446 rtm.rtm_pid = rtr->rtr_pid;
1447 rtm.rtm_seq = rtr->rtr_seq;
1450 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
1451 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
1452 rtsock_msg_one(rtr->rtr_src, AF_INET, pbuf);
1456 * Obtain ARP table entries.
1458 static ssize_t
1459 rtsock_info_lltable_arp(struct rmib_oldp * oldp)
1461 struct sockaddr_in addr;
1462 struct sockaddr_dlx gateway;
1463 struct rt_msghdr rtm;
1464 struct rtsock_rta rta;
1465 lldata_arp_num_t num;
1466 ssize_t r, off;
1468 off = 0;
1470 for (num = 0; lldata_arp_enum(&num); num++) {
1471 /* Size (over)estimation shortcut. */
1472 if (oldp == NULL) {
1473 off += sizeof(struct rt_msghdr) +
1474 RT_ROUNDUP(sizeof(addr)) +
1475 RT_ROUNDUP(sizeof(gateway));
1477 continue;
1480 rtsock_get_arp(&rtm, &rta, &addr, &gateway, num, RTM_GET);
1482 if ((r = rtsock_rta_finalize(&rtm, sizeof(rtm),
1483 &rtm.rtm_msglen, &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp,
1484 off)) < 0)
1485 return r;
1486 off += r;
1489 /* TODO: should we add slack here? */
1490 return off;
1494 * Generate routing socket data for an NDP table entry, for either routing
1495 * socket broadcasting or a sysctl(7) request. The NDP table entry number is
1496 * given as 'num'. The type of the message (RTM_) is given as 'type'. The
1497 * resulting routing message header is stored in 'rtm' and an address vector is
1498 * stored in 'rta'. The latter may point to addresses generated in 'addr' and
1499 * 'gateway'. The caller is responsible for combining the results into an
1500 * appropriate routing message.
1502 static void
1503 rtsock_get_ndp(struct rt_msghdr * rtm, struct rtsock_rta * rta,
1504 struct sockaddr_in6 * addr, struct sockaddr_dlx * gateway,
1505 lldata_ndp_num_t num, unsigned int type)
1507 struct ifdev *ifdev;
1508 unsigned int flags;
1510 lldata_ndp_get(num, addr, gateway, &ifdev, &flags);
1512 memset(rtm, 0, sizeof(*rtm));
1513 rtm->rtm_version = RTM_VERSION;
1514 rtm->rtm_type = type;
1515 rtm->rtm_flags = flags;
1516 rtm->rtm_index = ifdev_get_index(ifdev);
1518 rtsock_rta_init(rta);
1520 rtsock_rta_set(rta, RTAX_DST, addr, addr->sin6_len);
1522 rtsock_rta_set(rta, RTAX_GATEWAY, gateway, gateway->sdlx_len);
1526 * Send a routing message about an NDP table entry, with the given type which
1527 * may be one of RTM_ADD, RTM_CHANGE, RTM_DELETE, RTM_LOCK, and RTM_GET. The
1528 * routing socket request information 'rtr', if not NULL, provides additional
1529 * information about the routing socket that was the source of the request (if
1530 * any) and various fields that should be echoed.
1532 void
1533 rtsock_msg_ndp(lldata_ndp_num_t num, unsigned int type,
1534 const struct rtsock_request * rtr)
1536 struct sockaddr_in6 addr;
1537 struct sockaddr_dlx gateway;
1538 struct rt_msghdr rtm;
1539 struct rtsock_rta rta;
1540 struct pbuf *pbuf;
1542 assert(rtr != NULL);
1545 * We do not maintain the link-local tables ourselves, and thus, we do
1546 * not have a complete view of modifications to them. In order not to
1547 * confuse userland with inconsistent updates (e.g., deletion of
1548 * previously unreported entries), send these routing messages to the
1549 * source of the routing request only.
1551 if (!rtsock_msg_one(rtr->rtr_src, AF_INET6, NULL /*pbuf*/))
1552 return;
1554 rtsock_get_ndp(&rtm, &rta, &addr, &gateway, num, type);
1556 if (rtr != NULL) {
1557 rtm.rtm_flags |= RTF_DONE;
1558 rtm.rtm_pid = rtr->rtr_pid;
1559 rtm.rtm_seq = rtr->rtr_seq;
1562 if (rtsock_rta_finalize(&rtm, sizeof(rtm), &rtm.rtm_msglen,
1563 &rtm.rtm_addrs, &rta, &pbuf, NULL, 0) > 0)
1564 rtsock_msg_one(rtr->rtr_src, AF_INET6, pbuf);
1568 * Obtain NDP table entries.
1570 static ssize_t
1571 rtsock_info_lltable_ndp(struct rmib_oldp * oldp)
1573 struct rt_msghdr rtm;
1574 struct rtsock_rta rta;
1575 struct sockaddr_in6 addr;
1576 struct sockaddr_dlx gateway;
1577 lldata_ndp_num_t num;
1578 ssize_t r, off;
1580 off = 0;
1582 for (num = 0; lldata_ndp_enum(&num); num++) {
1583 /* Size (over)estimation shortcut. */
1584 if (oldp == NULL) {
1585 off += sizeof(struct rt_msghdr) +
1586 RT_ROUNDUP(sizeof(addr)) +
1587 RT_ROUNDUP(sizeof(gateway));
1589 continue;
1592 rtsock_get_ndp(&rtm, &rta, &addr, &gateway, num, RTM_GET);
1594 if ((r = rtsock_rta_finalize(&rtm, sizeof(rtm),
1595 &rtm.rtm_msglen, &rtm.rtm_addrs, &rta, NULL /*pbuf*/, oldp,
1596 off)) < 0)
1597 return r;
1598 off += r;
1601 /* TODO: should we add slack here? */
1602 return off;
1606 * Obtain link-layer (ARP, NDP) table entries.
1608 static ssize_t
1609 rtsock_info_lltable(struct rmib_oldp * oldp, int family)
1612 switch (family) {
1613 case AF_INET:
1614 return rtsock_info_lltable_arp(oldp);
1616 case AF_INET6:
1617 return rtsock_info_lltable_ndp(oldp);
1619 default:
1620 return 0;
1625 * Obtain link-layer address information for one specific interface.
1627 static ssize_t
1628 rtsock_info_if_dl(struct ifdev * ifdev, struct ifa_msghdr * ifam,
1629 struct rmib_oldp * oldp, ssize_t off)
1631 struct rtsock_rta rta;
1632 struct sockaddr_dlx sdlx;
1633 ifaddr_dl_num_t num;
1634 ssize_t r, len;
1636 len = 0;
1638 for (num = 0; ifaddr_dl_enum(ifdev, &num); num++) {
1639 if (oldp == NULL) {
1640 len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sdlx));
1642 continue;
1645 rtsock_rta_init(&rta);
1647 rtsock_rta_add_dl(&rta, ifdev, num, &sdlx);
1649 if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam),
1650 &ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/,
1651 oldp, off + len)) < 0)
1652 return r;
1653 len += r;
1656 return len;
1660 * Obtain IPv4 address information for one specific interface.
1662 static ssize_t
1663 rtsock_info_if_v4(struct ifdev * ifdev, struct ifa_msghdr * ifam,
1664 struct rmib_oldp * oldp, ssize_t off)
1666 struct sockaddr_in sin[4];
1667 struct rtsock_rta rta;
1668 ifaddr_v4_num_t num;
1669 ssize_t r, len;
1671 len = 0;
1674 * Mostly for future compatibility, we support multiple IPv4 interface
1675 * addresses here. Every interface has an interface address and a
1676 * netmask. In addition, an interface may have either a broadcast or a
1677 * destination address.
1679 for (num = 0; ifaddr_v4_enum(ifdev, &num); num++) {
1680 /* Size (over)estimation shortcut. */
1681 if (oldp == NULL) {
1682 len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sin[0])) * 3;
1684 continue;
1687 rtsock_rta_init(&rta);
1689 rtsock_rta_add_v4(&rta, ifdev, num, sin);
1691 if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam),
1692 &ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/,
1693 oldp, off + len)) < 0)
1694 return r;
1695 len += r;
1698 return len;
1702 * Obtain IPv6 address information for one specific interface.
1704 static ssize_t
1705 rtsock_info_if_v6(struct ifdev * ifdev, struct ifa_msghdr * ifam,
1706 struct rmib_oldp * oldp, ssize_t off)
1708 struct sockaddr_in6 sin6[3];
1709 struct rtsock_rta rta;
1710 ifaddr_v6_num_t num;
1711 ssize_t r, len;
1713 len = 0;
1715 /* As with IPv4, except that IPv6 has no broadcast addresses. */
1716 for (num = 0; ifaddr_v6_enum(ifdev, &num); num++) {
1717 /* Size (over)estimation shortcut. */
1718 if (oldp == NULL) {
1719 len += sizeof(*ifam) + RT_ROUNDUP(sizeof(sin6[0])) * 3;
1721 continue;
1724 rtsock_rta_init(&rta);
1726 rtsock_rta_add_v6(&rta, ifdev, num, sin6);
1728 if ((r = rtsock_rta_finalize(ifam, sizeof(*ifam),
1729 &ifam->ifam_msglen, &ifam->ifam_addrs, &rta, NULL /*pbuf*/,
1730 oldp, off + len)) < 0)
1731 return r;
1732 len += r;
1735 return len;
1739 * Obtain information for one specific interface.
1741 static ssize_t
1742 rtsock_info_if(struct ifdev * ifdev, struct rmib_oldp * oldp, ssize_t off,
1743 int family)
1745 struct rtsock_rta rta;
1746 struct sockaddr_dlx sdlx;
1747 struct if_msghdr ifm;
1748 struct ifa_msghdr ifam;
1749 unsigned int ifflags;
1750 ssize_t r, len, sdlxsize;
1752 len = 0;
1754 ifflags = ifdev_get_ifflags(ifdev);
1756 /* Create an interface information entry. */
1757 rtsock_rta_init(&rta);
1759 if (oldp != NULL) {
1760 memset(&ifm, 0, sizeof(ifm));
1761 ifm.ifm_version = RTM_VERSION;
1762 ifm.ifm_type = RTM_IFINFO;
1763 ifm.ifm_flags = ifflags;
1764 ifm.ifm_index = ifdev_get_index(ifdev);
1765 memcpy(&ifm.ifm_data, ifdev_get_ifdata(ifdev),
1766 sizeof(ifm.ifm_data));
1770 * Generate a datalink socket address structure. TODO: see if it is
1771 * worth obtaining just the length for the (oldp == NULL) case here.
1773 memset(&sdlx, 0, sizeof(sdlx));
1775 ifaddr_dl_get(ifdev, 0, &sdlx);
1777 sdlxsize = RT_ROUNDUP(sdlx.sdlx_len);
1779 rtsock_rta_set(&rta, RTAX_IFP, &sdlx, sdlxsize);
1781 if ((r = rtsock_rta_finalize(&ifm, sizeof(ifm), &ifm.ifm_msglen,
1782 &ifm.ifm_addrs, &rta, NULL /*pbuf*/, oldp, off + len)) < 0)
1783 return r;
1784 len += r;
1786 /* Generate a header for all addresses once. */
1787 if (oldp != NULL) {
1788 memset(&ifam, 0, sizeof(ifam));
1789 ifam.ifam_version = RTM_VERSION;
1790 ifam.ifam_type = RTM_NEWADDR;
1791 ifam.ifam_flags = 0;
1792 ifam.ifam_index = ifdev_get_index(ifdev);
1793 ifam.ifam_metric = ifdev_get_metric(ifdev);
1796 /* If requested and applicable, add any datalink addresses. */
1797 if (family == AF_UNSPEC || family == AF_LINK) {
1798 if ((r = rtsock_info_if_dl(ifdev, &ifam, oldp, off + len)) < 0)
1799 return r;
1800 len += r;
1803 /* If requested and applicable, add any IPv4 addresses. */
1804 if (family == AF_UNSPEC || family == AF_INET) {
1805 if ((r = rtsock_info_if_v4(ifdev, &ifam, oldp, off + len)) < 0)
1806 return r;
1807 len += r;
1810 /* If requested and applicable, add any IPv6 addresses. */
1811 if (family == AF_UNSPEC || family == AF_INET6) {
1812 if ((r = rtsock_info_if_v6(ifdev, &ifam, oldp, off + len)) < 0)
1813 return r;
1814 len += r;
1817 return len;
1821 * Obtain interface information.
1823 static ssize_t
1824 rtsock_info_iflist(struct rmib_oldp * oldp, int family, uint32_t ifindex)
1826 struct ifdev *ifdev;
1827 ssize_t r, off;
1830 * If information about a specific interface index is requested, then
1831 * return information for just that interface.
1833 if (ifindex != 0) {
1834 if ((ifdev = ifdev_get_by_index(ifindex)) != NULL)
1835 return rtsock_info_if(ifdev, oldp, 0, family);
1836 else
1837 return 0;
1840 /* Otherwise, iterate through the list of all interfaces. */
1841 off = 0;
1843 for (ifdev = ifdev_enum(NULL); ifdev != NULL;
1844 ifdev = ifdev_enum(ifdev)) {
1846 /* Avoid generating results that are never copied out. */
1847 if (oldp != NULL && !rmib_inrange(oldp, off))
1848 oldp = NULL;
1850 if ((r = rtsock_info_if(ifdev, oldp, off, family)) < 0)
1851 return r;
1853 off += r;
1856 /* TODO: should we add slack here? */
1857 return off;
1861 * Obtain routing table, ARP cache, and interface information through
1862 * sysctl(7). Return the (produced, or if oldp is NULL, estimated) byte size
1863 * of the output on success, or a negative error code on failure.
1865 static ssize_t
1866 rtsock_info(struct rmib_call * call, struct rmib_node * node __unused,
1867 struct rmib_oldp * oldp, struct rmib_newp * newp __unused)
1869 int family, filter;
1871 if (call->call_namelen != 3)
1872 return EINVAL;
1874 family = call->call_name[0];
1875 filter = call->call_name[2];
1877 switch (call->call_name[1]) {
1878 case NET_RT_FLAGS:
1880 * Preliminary support for changes as of NetBSD 8, where by
1881 * default, the use of this subcall implies an ARP/NDP-only
1882 * request.
1884 if (filter == 0)
1885 filter |= RTF_LLDATA;
1887 if (filter & RTF_LLDATA) {
1888 if (family == AF_UNSPEC)
1889 return EINVAL;
1892 * Split off ARP/NDP handling from the normal routing
1893 * table listing, as done since NetBSD 8. We generate
1894 * the ARP/NDP listing from here, and keep those
1895 * entries out of the routing table dump below. Since
1896 * the filter is of a match-any type, and we have just
1897 * matched a flag, no further filtering is needed here.
1899 return rtsock_info_lltable(oldp, family);
1902 /* FALLTHROUGH */
1903 case NET_RT_DUMP:
1904 return rtsock_info_rtable(oldp, family, filter);
1906 case NET_RT_IFLIST:
1907 return rtsock_info_iflist(oldp, family, filter);
1909 default:
1910 return EINVAL;