1 /* LWIP service - ipsock.c - shared IP-level socket code */
6 #define ip6_hdr __netbsd_ip6_hdr /* conflicting definitions */
8 #include <netinet/ip.h>
9 #include <netinet/in_pcb.h>
10 #include <netinet6/in6_pcb.h>
13 /* The following are sysctl(7) settings. */
14 int lwip_ip4_forward
= 0; /* We patch lwIP to check these.. */
15 int lwip_ip6_forward
= 0; /* ..two settings at run time. */
16 static int ipsock_v6only
= 1;
18 /* The CTL_NET PF_INET IPPROTO_IP subtree. */
19 static struct rmib_node net_inet_ip_table
[] = {
20 /* 1*/ [IPCTL_FORWARDING
] = RMIB_INTPTR(RMIB_RW
, &lwip_ip4_forward
,
22 "Enable forwarding of INET diagrams"),
23 /* 3*/ [IPCTL_DEFTTL
] = RMIB_INT(RMIB_RO
, IP_DEFAULT_TTL
, "ttl",
24 "Default TTL for an INET diagram"),
25 /*23*/ [IPCTL_LOOPBACKCKSUM
] = RMIB_FUNC(RMIB_RW
| CTLTYPE_INT
, sizeof(int),
26 loopif_cksum
, "do_loopback_cksum",
27 "Perform IP checksum on loopback"),
30 static struct rmib_node net_inet_ip_node
=
31 RMIB_NODE(RMIB_RO
, net_inet_ip_table
, "ip", "IPv4 related settings");
33 /* The CTL_NET PF_INET6 IPPROTO_IPV6 subtree. */
34 static struct rmib_node net_inet6_ip6_table
[] = {
35 /* 1*/ [IPV6CTL_FORWARDING
] = RMIB_INTPTR(RMIB_RW
, &lwip_ip6_forward
,
37 "Enable forwarding of INET6 diagrams"),
39 * The following functionality is not
40 * implemented in lwIP at this time.
42 /* 2*/ [IPV6CTL_SENDREDIRECTS
] = RMIB_INT(RMIB_RO
, 0, "redirect", "Enable "
43 "sending of ICMPv6 redirect messages"),
44 /* 3*/ [IPV6CTL_DEFHLIM
] = RMIB_INT(RMIB_RO
, IP_DEFAULT_TTL
, "hlim",
45 "Hop limit for an INET6 datagram"),
46 /*12*/ [IPV6CTL_ACCEPT_RTADV
] = RMIB_INTPTR(RMIB_RW
, &ifaddr_accept_rtadv
,
48 "Accept router advertisements"),
49 /*16*/ [IPV6CTL_DAD_COUNT
] = RMIB_INT(RMIB_RO
,
50 LWIP_IPV6_DUP_DETECT_ATTEMPTS
, "dad_count",
51 "Number of Duplicate Address Detection "
53 /*24*/ [IPV6CTL_V6ONLY
] = RMIB_INTPTR(RMIB_RW
, &ipsock_v6only
,
54 "v6only", "Disallow PF_INET6 sockets from "
55 "connecting to PF_INET sockets"),
57 * The following setting is significantly
58 * different from NetBSD, and therefore it has
59 * a somewhat different description as well.
61 /*35*/ [IPV6CTL_AUTO_LINKLOCAL
]= RMIB_INTPTR(RMIB_RW
, &ifaddr_auto_linklocal
,
62 "auto_linklocal", "Enable global support "
63 "for adding IPv6link-local addresses to "
66 * Temporary addresses are managed entirely by
67 * userland. We only maintain the settings.
69 /*+0*/ [IPV6CTL_MAXID
] = RMIB_INT(RMIB_RW
, 0, "use_tempaddr",
70 "Use temporary address"),
71 /*+1*/ [IPV6CTL_MAXID
+ 1] = RMIB_INT(RMIB_RW
, 86400, "temppltime",
72 "Preferred lifetime of a temporary "
74 /*+2*/ [IPV6CTL_MAXID
+ 2] = RMIB_INT(RMIB_RW
, 604800, "tempvltime",
75 "Valid lifetime of a temporary address"),
78 static struct rmib_node net_inet6_ip6_node
=
79 RMIB_NODE(RMIB_RO
, net_inet6_ip6_table
, "ip6", "IPv6 related settings");
82 * Initialize the IP sockets module.
89 * Register the net.inet.ip and net.inet6.ip6 subtrees. Unlike for the
90 * specific protocols (TCP/UDP/RAW), here the IPv4 and IPv6 subtrees
91 * are and must be separate, even though many settings are shared
92 * between the two at the lwIP level. Ultimately we may have to split
93 * the subtrees for the specific protocols, too, though..
95 mibtree_register_inet(AF_INET
, IPPROTO_IP
, &net_inet_ip_node
);
96 mibtree_register_inet(AF_INET6
, IPPROTO_IPV6
, &net_inet6_ip6_node
);
100 * Return the lwIP IP address type (IPADDR_TYPE_) for the given IP socket.
103 ipsock_get_type(struct ipsock
* ip
)
106 if (!(ip
->ip_flags
& IPF_IPV6
))
107 return IPADDR_TYPE_V4
;
108 else if (ip
->ip_flags
& IPF_V6ONLY
)
109 return IPADDR_TYPE_V6
;
111 return IPADDR_TYPE_ANY
;
115 * Create an IP socket, for the given (PF_/AF_) domain and initial send and
116 * receive buffer sizes. Return the lwIP IP address type that should be used
117 * to create the corresponding PCB. Return a pointer to the libsockevent
118 * socket in 'sockp'. This function must not allocate any resources in any
119 * form, as socket creation may still fail later, in which case no destruction
120 * function is called.
123 ipsock_socket(struct ipsock
* ip
, int domain
, size_t sndbuf
, size_t rcvbuf
,
124 struct sock
** sockp
)
127 ip
->ip_flags
= (domain
== AF_INET6
) ? IPF_IPV6
: 0;
129 if (domain
== AF_INET6
&& ipsock_v6only
)
130 ip
->ip_flags
|= IPF_V6ONLY
;
132 ip
->ip_sndbuf
= sndbuf
;
133 ip
->ip_rcvbuf
= rcvbuf
;
135 /* Important: when adding settings here, also change ipsock_clone(). */
137 *sockp
= &ip
->ip_sock
;
139 return ipsock_get_type(ip
);
143 * Clone the given socket 'ip' into the new socket 'newip', using the socket
144 * identifier 'newid'. In particular, tell libsockevent about the clone and
145 * copy over any settings from 'ip' to 'newip' that can be inherited on a
146 * socket. Cloning is used for new TCP connections arriving on listening TCP
147 * sockets. This function must not fail.
150 ipsock_clone(struct ipsock
* ip
, struct ipsock
* newip
, sockid_t newid
)
153 sockevent_clone(&ip
->ip_sock
, &newip
->ip_sock
, newid
);
155 /* Inherit all settings from the original socket. */
156 newip
->ip_flags
= ip
->ip_flags
;
157 newip
->ip_sndbuf
= ip
->ip_sndbuf
;
158 newip
->ip_rcvbuf
= ip
->ip_rcvbuf
;
162 * Create an <any> address for the given socket, taking into account whether
163 * the socket is IPv4, IPv6, or mixed. The generated address, stored in
164 * 'ipaddr', will have the same type as returned from the ipsock_socket() call.
167 ipsock_get_any_addr(struct ipsock
* ip
, ip_addr_t
* ipaddr
)
170 ip_addr_set_any(ipsock_is_ipv6(ip
), ipaddr
);
172 if (ipsock_is_ipv6(ip
) && !ipsock_is_v6only(ip
))
173 IP_SET_TYPE(ipaddr
, IPADDR_TYPE_ANY
);
177 * Verify whether the given (properly scoped) IP address is a valid source
178 * address for the given IP socket. The 'allow_mcast' flag indicates whether
179 * the source address is allowed to be a multicast address. Return OK on
180 * success. If 'ifdevp' is not NULL, it is filled with either the interface
181 * that owns the address, or NULL if the address is (while valid) not
182 * associated with a particular interface. On failure, return a negative error
183 * code. This function must be called, in one way or another, for every source
184 * address used for binding or sending on a IP-layer socket.
187 ipsock_check_src_addr(struct ipsock
* ip
, ip_addr_t
* ipaddr
, int allow_mcast
,
188 struct ifdev
** ifdevp
)
192 uint32_t inaddr
, zone
;
196 * TODO: for now, forbid binding to multicast addresses. Callers that
197 * never allow multicast addresses anyway (e.g., IPV6_PKTINFO) should
198 * do their own check for this; the one here may eventually be removed.
200 is_mcast
= ip_addr_ismulticast(ipaddr
);
202 if (is_mcast
&& !allow_mcast
)
203 return EADDRNOTAVAIL
;
205 if (IP_IS_V6(ipaddr
)) {
207 * The given address must not have a KAME-style embedded zone.
208 * This check is already performed in addr_get_inet(), but we
209 * have to replicate it here because not all source addresses
210 * go through addr_get_inet().
212 ip6addr
= ip_2_ip6(ipaddr
);
214 if (ip6_addr_has_scope(ip6addr
, IP6_UNKNOWN
) &&
215 (ip6addr
->addr
[0] & PP_HTONL(0x0000ffffUL
)))
219 * lwIP does not support IPv4-mapped IPv6 addresses, so these
220 * must be converted to plain IPv4 addresses instead. The IPv4
221 * 'any' address is not supported in this form. In V6ONLY
222 * mode, refuse connecting or sending to IPv4-mapped addresses
225 if (ip6_addr_isipv4mappedipv6(ip6addr
)) {
226 if (ipsock_is_v6only(ip
))
229 inaddr
= ip6addr
->addr
[3];
231 if (inaddr
== PP_HTONL(INADDR_ANY
))
232 return EADDRNOTAVAIL
;
234 ip_addr_set_ip4_u32(ipaddr
, inaddr
);
240 if (!ip_addr_isany(ipaddr
)) {
241 if (IP_IS_V6(ipaddr
) &&
242 ip6_addr_lacks_zone(ip_2_ip6(ipaddr
), IP6_UNKNOWN
))
243 return EADDRNOTAVAIL
;
246 * If the address is a unicast address, it must be assigned to
247 * an interface. Otherwise, if it is a zoned multicast
248 * address, the zone denotes the interface. For global
249 * multicast addresses, we cannot determine an interface.
252 if ((ifdev
= ifaddr_map_by_addr(ipaddr
)) == NULL
)
253 return EADDRNOTAVAIL
;
255 /* Some multicast addresses are not acceptable. */
256 if (!addr_is_valid_multicast(ipaddr
))
259 if (IP_IS_V6(ipaddr
) &&
260 ip6_addr_has_zone(ip_2_ip6(ipaddr
))) {
261 zone
= ip6_addr_zone(ip_2_ip6(ipaddr
));
263 if ((ifdev
= ifdev_get_by_index(zone
)) == NULL
)
276 * Retrieve and validate a source address for use in a socket bind call on
277 * socket 'ip'. The user-provided address is given as 'addr', with length
278 * 'addr_len'. The socket's current local IP address and port are given as
279 * 'local_ip' and 'local_port', respectively; for raw sockets, the given local
280 * port number is always zero. The caller's endpoint is given as 'user_endpt',
281 * used to make sure only root can bind to local port numbers. The boolean
282 * 'allow_mcast' flag indicates whether the source address is allowed to be a
283 * multicast address. On success, return OK with the source IP address stored
284 * in 'src_addr' and, if 'src_port' is not NULL, the port number to bind to
285 * stored in 'portp'. Otherwise, return a negative error code. This function
286 * performs all the tasks necessary before the socket can be bound using a lwIP
290 ipsock_get_src_addr(struct ipsock
* ip
, const struct sockaddr
* addr
,
291 socklen_t addr_len
, endpoint_t user_endpt
, ip_addr_t
* local_ip
,
292 uint16_t local_port
, int allow_mcast
, ip_addr_t
* src_addr
,
299 * If the socket has been bound already, it cannot be bound again.
300 * We check this by checking whether the current local port is non-
301 * zero. This rule does not apply to raw sockets, but raw sockets have
302 * no port numbers anyway, so this conveniently works out. However,
303 * raw sockets may not be rebound after being connected, but that is
304 * checked before we even get here.
309 /* Parse the user-provided address. */
310 if ((r
= addr_get_inet(addr
, addr_len
, ipsock_get_type(ip
), src_addr
,
311 FALSE
/*kame*/, &port
)) != OK
)
314 /* Validate the user-provided address. */
315 if ((r
= ipsock_check_src_addr(ip
, src_addr
, allow_mcast
,
316 NULL
/*ifdevp*/)) != OK
)
320 * If we are interested in port numbers at all (for non-raw sockets,
321 * meaning portp is not NULL), make sure that only the superuser can
322 * bind to privileged port numbers. For raw sockets, only the
323 * superuser can open a socket anyway, so we need no check here.
325 if (src_port
!= NULL
) {
326 if (port
!= 0 && port
< IPPORT_RESERVED
&&
327 !util_is_root(user_endpt
))
337 * Retrieve and validate a destination address for use in a socket connect or
338 * sendto call. The user-provided address is given as 'addr', with length
339 * 'addr_len'. The socket's current local IP address is given as 'local_addr'.
340 * On success, return OK with the destination IP address stored in 'dst_addr'
341 * and, if 'dst_port' is not NULL, the port number to bind to stored in
342 * 'dst_port'. Otherwise, return a negative error code. This function must be
343 * called, in one way or another, for every destination address used for
344 * connecting or sending on a IP-layer socket.
347 ipsock_get_dst_addr(struct ipsock
* ip
, const struct sockaddr
* addr
,
348 socklen_t addr_len
, const ip_addr_t
* local_addr
, ip_addr_t
* dst_addr
,
354 /* Parse the user-provided address. */
355 if ((r
= addr_get_inet(addr
, addr_len
, ipsock_get_type(ip
), dst_addr
,
356 FALSE
/*kame*/, &port
)) != OK
)
359 /* Destination addresses are always specific. */
360 if (IP_GET_TYPE(dst_addr
) == IPADDR_TYPE_ANY
)
361 IP_SET_TYPE(dst_addr
, IPADDR_TYPE_V6
);
364 * lwIP does not support IPv4-mapped IPv6 addresses, so these must be
365 * supported to plain IPv4 addresses instead. In V6ONLY mode, refuse
366 * connecting or sending to IPv4-mapped addresses at all.
368 if (IP_IS_V6(dst_addr
) &&
369 ip6_addr_isipv4mappedipv6(ip_2_ip6(dst_addr
))) {
370 if (ipsock_is_v6only(ip
))
373 ip_addr_set_ip4_u32(dst_addr
, ip_2_ip6(dst_addr
)->addr
[3]);
377 * Now make sure that the local and remote addresses are of the same
378 * family. The local address may be of type IPADDR_TYPE_ANY, which is
379 * allowed for both IPv4 and IPv6. Even for connectionless socket
380 * types we must perform this check as part of connect calls (as well
381 * as sendto calls!) because otherwise we will create problems for
382 * sysctl based socket enumeration (i.e., netstat), which uses the
383 * local IP address type to determine the socket family.
385 if (IP_GET_TYPE(local_addr
) != IPADDR_TYPE_ANY
&&
386 IP_IS_V6(local_addr
) != IP_IS_V6(dst_addr
))
390 * TODO: on NetBSD, an 'any' destination address is replaced with a
391 * local interface address.
393 if (ip_addr_isany(dst_addr
))
397 * If the address is a multicast address, the multicast address itself
400 if (ip_addr_ismulticast(dst_addr
) &&
401 !addr_is_valid_multicast(dst_addr
))
405 * TODO: decide whether to add a zone to a scoped IPv6 address that
406 * lacks a zone. For now, we let lwIP handle this, as lwIP itself
407 * will always add the zone at some point. If anything changes there,
408 * this would be the place to set the zone (using a route lookup).
412 * For now, we do not forbid or alter any other particular destination
416 if (dst_port
!= NULL
) {
418 * Disallow connecting/sending to port zero. There is no error
419 * code that applies well to this case, so we copy NetBSD's.
422 return EADDRNOTAVAIL
;
431 * Store the address 'ipaddr' associated with the socket 'ip' (for example, it
432 * may be the local or remote IP address of the socket) as a sockaddr structure
433 * in 'addr'. A port number is provided as 'port' (in host-byte order) if
434 * relevant, and zero is passed in otherwise. This function MUST only be
435 * called from contexts where 'addr' is a buffer provided by libsockevent or
436 * libsockdriver, meaning that it is of size SOCKADDR_MAX. The value pointed
437 * to by 'addr_len' is not expected to be initialized in calls to this function
438 * (and will typically zero). On return, 'addr_len' is filled with the length
439 * of the address generated in 'addr'. This function never fails.
442 ipsock_put_addr(struct ipsock
* ip
, struct sockaddr
* addr
,
443 socklen_t
* addr_len
, ip_addr_t
* ipaddr
, uint16_t port
)
445 ip_addr_t mappedaddr
;
448 * If the socket is an AF_INET6-type socket, and the given address is
449 * an IPv4-type address, store it as an IPv4-mapped IPv6 address.
451 if (ipsock_is_ipv6(ip
) && IP_IS_V4(ipaddr
)) {
452 addr_make_v4mapped_v6(&mappedaddr
, ip_2_ip4(ipaddr
));
454 ipaddr
= &mappedaddr
;
458 * We have good reasons to keep the sockdriver and sockevent APIs as
459 * they are, namely, defaulting 'addr_len' to zero such that the caller
460 * must provide a non-zero length (only) when returning a valid
461 * address. The consequence here is that we have to know the size of
462 * the provided buffer. For libsockevent callbacks, we are always
463 * guaranteed to get a buffer of at least this size.
465 *addr_len
= SOCKADDR_MAX
;
467 addr_put_inet(addr
, addr_len
, ipaddr
, FALSE
/*kame*/, port
);
471 * Set socket options on an IP socket.
474 ipsock_setsockopt(struct ipsock
* ip
, int level
, int name
,
475 const struct sockdriver_data
* data
, socklen_t len
,
476 struct ipopts
* ipopts
)
485 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
489 if (val
<= 0 || (size_t)val
< ipopts
->sndmin
||
490 (size_t)val
> ipopts
->sndmax
)
498 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
502 if (val
<= 0 || (size_t)val
< ipopts
->rcvmin
||
503 (size_t)val
> ipopts
->rcvmax
)
514 if (ipsock_is_ipv6(ip
))
519 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
523 if (val
< 0 || val
> UINT8_MAX
)
526 *ipopts
->tos
= (uint8_t)val
;
531 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
535 if (val
< 0 || val
> UINT8_MAX
)
538 *ipopts
->ttl
= (uint8_t)val
;
546 if (!ipsock_is_ipv6(ip
))
550 case IPV6_UNICAST_HOPS
:
551 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
555 if (val
< -1 || val
> UINT8_MAX
)
559 val
= IP_DEFAULT_TTL
;
566 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
570 if (val
< -1 || val
> UINT8_MAX
)
581 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
586 * If the socket has been bound to an actual address,
587 * we still allow the option to be changed, but it no
588 * longer has any effect.
590 type
= IP_GET_TYPE(ipopts
->local_ip
);
591 allow
= (type
== IPADDR_TYPE_ANY
||
592 (type
== IPADDR_TYPE_V6
&&
593 ip_addr_isany(ipopts
->local_ip
)));
596 ip
->ip_flags
|= IPF_V6ONLY
;
598 type
= IPADDR_TYPE_V6
;
600 ip
->ip_flags
&= ~IPF_V6ONLY
;
602 type
= IPADDR_TYPE_ANY
;
606 IP_SET_TYPE(ipopts
->local_ip
, type
);
618 * Retrieve socket options on an IP socket.
621 ipsock_getsockopt(struct ipsock
* ip
, int level
, int name
,
622 const struct sockdriver_data
* data
, socklen_t
* len
,
623 struct ipopts
* ipopts
)
633 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
639 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
646 if (ipsock_is_ipv6(ip
))
651 val
= (int)*ipopts
->tos
;
653 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
657 val
= (int)*ipopts
->ttl
;
659 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
666 if (!ipsock_is_ipv6(ip
))
670 case IPV6_UNICAST_HOPS
:
673 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
679 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
683 val
= !!(ip
->ip_flags
& IPF_V6ONLY
);
685 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
696 * Fill the given kinfo_pcb sysctl(7) structure with IP-level information.
699 ipsock_get_info(struct kinfo_pcb
* ki
, const ip_addr_t
* local_ip
,
700 uint16_t local_port
, const ip_addr_t
* remote_ip
, uint16_t remote_port
)
706 len
= sizeof(ki
->ki_spad
); /* use this for the full size, not ki_src */
708 addr_put_inet(&ki
->ki_src
, &len
, local_ip
, TRUE
/*kame*/, local_port
);
711 * At this point, the local IP address type has already been used to
712 * determine whether this is an IPv4 or IPv6 socket. While not ideal,
713 * that is the best we can do: we cannot use IPv4-mapped IPv6 addresses
714 * in lwIP PCBs, we cannot store the original type in those PCBs, and
715 * we also cannot rely on the PCB having an associated ipsock object
716 * anymore. We also cannot use the ipsock only when present: it could
717 * make a TCP PCB "jump" from IPv6 to IPv4 in the netstat listing when
718 * it goes into TIME_WAIT state, for example.
720 * So, use *only* the type of the local IP address to determine whether
721 * this is an IPv4 or an IPv6 socket. At the same time, do *not* rely
722 * on the remote IP address being IPv4 for a local IPv4 address; it may
723 * be of type IPADDR_TYPE_V6 for an unconnected socket bound to an
724 * IPv4-mapped IPv6 address. Pretty messy, but we're limited by what
725 * lwIP offers here. Since it's just netstat, it need not be perfect.
727 if ((type
= IP_GET_TYPE(local_ip
)) == IPADDR_TYPE_V4
) {
728 if (!ip_addr_isany(local_ip
) || local_port
!= 0)
729 ki
->ki_prstate
= INP_BOUND
;
732 * Make sure the returned socket address types are consistent.
733 * The only case where the remote IP address is not IPv4 here
734 * is when it is not set yet, so there is no need to check
735 * whether it is the 'any' address: it always is.
737 if (IP_GET_TYPE(remote_ip
) != IPADDR_TYPE_V4
) {
738 ip_addr_set_zero_ip4(&ipaddr
);
743 if (!ip_addr_isany(local_ip
) || local_port
!= 0)
744 ki
->ki_prstate
= IN6P_BOUND
;
745 if (type
!= IPADDR_TYPE_ANY
)
746 ki
->ki_pflags
|= IN6P_IPV6_V6ONLY
;
749 len
= sizeof(ki
->ki_dpad
); /* use this for the full size, not ki_dst */
751 addr_put_inet(&ki
->ki_dst
, &len
, remote_ip
, TRUE
/*kame*/,
754 /* Check the type of the *local* IP address here. See above. */
755 if (!ip_addr_isany(remote_ip
) || remote_port
!= 0) {
756 if (type
== IPADDR_TYPE_V4
)
757 ki
->ki_prstate
= INP_CONNECTED
;
759 ki
->ki_prstate
= IN6P_CONNECTED
;