1 /* LWIP service - rawsock.c - RAW sockets */
3 * For IPv6 sockets, this module attempts to implement a part of RFC 3542, but
4 * currently not more than what is supported by lwIP and/or what is expected by
5 * a handful of standard utilities (dhcpcd, ping6, traceroute6..).
7 * For general understanding, be aware that IPv4 raw sockets always receive
8 * packets including the IP header, and may be used to send packets including
9 * the IP header if IP_HDRINCL is set, while IPv6 raw sockets always send and
10 * receive actual payloads only, using ancillary (control) data to set and
11 * retrieve per-packet IP header fields.
13 * For packet headers we follow general BSD semantics. For example, some IPv4
14 * header fields are swapped both when sending and when receiving. Also, like
15 * on NetBSD, IPPROTO_RAW is not a special value in any way.
23 #include "lwip/inet_chksum.h"
25 #include <net/route.h>
26 #include <netinet/icmp6.h>
27 #include <netinet/ip.h>
28 #include <netinet/in_pcb.h>
30 /* The number of RAW sockets. Inherited from the lwIP configuration. */
31 #define NR_RAWSOCK MEMP_NUM_RAW_PCB
34 * Outgoing packets are not getting buffered, so the send buffer size simply
35 * determines the maximum size for sent packets. The send buffer maximum is
36 * therefore limited to the maximum size of a single packet (64K-1 bytes),
37 * which is already enforced by lwIP's 16-bit length parameter to pbuf_alloc().
39 * The actual transmission may enforce a lower limit, though. The full packet
40 * size must not exceed the same 64K-1 limit, and that includes any headers
41 * that still have to be prepended to the given packet. The size of those
42 * headers depends on the socket type (IPv4/IPv6) and the IP_HDRINCL setting.
44 * The default is equal to the maximum here, because if a (by definition,
45 * privileged) application wishes to send large raw packets, it probably has a
46 * good reason, and we do not want to get in its way.
48 #define RAW_MAX_PAYLOAD (UINT16_MAX)
50 #define RAW_SNDBUF_MIN 1 /* minimum RAW send buffer size */
51 #define RAW_SNDBUF_DEF RAW_MAX_PAYLOAD /* default RAW send buffer size */
52 #define RAW_SNDBUF_MAX RAW_MAX_PAYLOAD /* maximum RAW send buffer size */
53 #define RAW_RCVBUF_MIN MEMPOOL_BUFSIZE /* minimum RAW receive buffer size */
54 #define RAW_RCVBUF_DEF 32768 /* default RAW receive buffer size */
55 #define RAW_RCVBUF_MAX 65536 /* maximum RAW receive buffer size */
57 static struct rawsock
{
58 struct pktsock raw_pktsock
; /* packet socket object */
59 struct raw_pcb
*raw_pcb
; /* lwIP RAW control block */
60 TAILQ_ENTRY(rawsock
) raw_next
; /* next in active/free list */
61 struct icmp6_filter raw_icmp6filter
; /* ICMPv6 type filter */
62 } raw_array
[NR_RAWSOCK
];
64 static TAILQ_HEAD(, rawsock
) raw_freelist
; /* list of free RAW sockets */
65 static TAILQ_HEAD(, rawsock
) raw_activelist
; /* list, in-use RAW sockets */
67 static const struct sockevent_ops rawsock_ops
;
69 #define rawsock_get_sock(raw) (ipsock_get_sock(rawsock_get_ipsock(raw)))
70 #define rawsock_get_ipsock(raw) (pktsock_get_ipsock(&(raw)->raw_pktsock))
71 #define rawsock_is_ipv6(raw) (ipsock_is_ipv6(rawsock_get_ipsock(raw)))
72 #define rawsock_is_v6only(raw) (ipsock_is_v6only(rawsock_get_ipsock(raw)))
73 #define rawsock_is_conn(raw) \
74 (raw_flags((raw)->raw_pcb) & RAW_FLAGS_CONNECTED)
75 #define rawsock_is_hdrincl(raw) \
76 (raw_flags((raw)->raw_pcb) & RAW_FLAGS_HDRINCL)
78 static ssize_t
rawsock_pcblist(struct rmib_call
*, struct rmib_node
*,
79 struct rmib_oldp
*, struct rmib_newp
*);
81 /* The CTL_NET {PF_INET,PF_INET6} IPPROTO_RAW subtree. */
82 /* All dynamically numbered; the sendspace/recvspace entries are ours. */
83 static struct rmib_node net_inet_raw_table
[] = {
84 RMIB_INT(RMIB_RO
, RAW_SNDBUF_DEF
, "sendspace",
85 "Default RAW send buffer size"),
86 RMIB_INT(RMIB_RO
, RAW_RCVBUF_DEF
, "recvspace",
87 "Default RAW receive buffer size"),
88 RMIB_FUNC(RMIB_RO
| CTLTYPE_NODE
, 0, rawsock_pcblist
, "pcblist",
89 "RAW IP protocol control block list"),
92 static struct rmib_node net_inet_raw_node
=
93 RMIB_NODE(RMIB_RO
, net_inet_raw_table
, "raw", "RAW IPv4 settings");
94 static struct rmib_node net_inet6_raw6_node
=
95 RMIB_NODE(RMIB_RO
, net_inet_raw_table
, "raw6", "RAW IPv6 settings");
98 * Initialize the raw sockets module.
105 /* Initialize the list of free RAW sockets. */
106 TAILQ_INIT(&raw_freelist
);
108 for (slot
= 0; slot
< __arraycount(raw_array
); slot
++)
109 TAILQ_INSERT_TAIL(&raw_freelist
, &raw_array
[slot
], raw_next
);
111 /* Initialize the list of active RAW sockets. */
112 TAILQ_INIT(&raw_activelist
);
114 /* Register the net.inet.raw and net.inet6.raw6 RMIB subtrees. */
115 mibtree_register_inet(PF_INET
, IPPROTO_RAW
, &net_inet_raw_node
);
116 mibtree_register_inet(PF_INET6
, IPPROTO_RAW
, &net_inet6_raw6_node
);
120 * Check whether the given arrived IPv6 packet is fit to be received on the
124 rawsock_check_v6(struct rawsock
* raw
, struct pbuf
* pbuf
)
128 assert(rawsock_is_ipv6(raw
));
131 * For ICMPv6 packets, test against the configured type filter.
133 if (raw
->raw_pcb
->protocol
== IPPROTO_ICMPV6
) {
134 if (pbuf
->len
< offsetof(struct icmp6_hdr
, icmp6_dataun
))
137 memcpy(&type
, &((struct icmp6_hdr
*)pbuf
->payload
)->icmp6_type
,
140 if (!ICMP6_FILTER_WILLPASS((int)type
, &raw
->raw_icmp6filter
))
145 * For ICMPv6 packets, or if IPV6_CHECKSUM is enabled, we have to
146 * verify the checksum of the packet before passing it to the user.
147 * This is costly, but it needs to be done and lwIP is not doing it for
148 * us (as of writing, anyway), even though it maintains the offset..
150 if (raw
->raw_pcb
->chksum_reqd
&&
151 (pbuf
->tot_len
< raw
->raw_pcb
->chksum_offset
+ sizeof(uint16_t) ||
152 ip6_chksum_pseudo(pbuf
, raw
->raw_pcb
->protocol
, pbuf
->tot_len
,
153 ip6_current_src_addr(), ip6_current_dest_addr()) != 0)) {
157 /* No reason to filter out this packet. */
162 * Adjust the given arrived IPv4 packet by changing the length and offset
163 * fields to host-byte order, as is done by the BSDs. This effectively mirrors
164 * the swapping part of the preparation done on IPv4 packets being sent if the
165 * IP_HDRINCL socket option is enabled.
168 rawsock_adjust_v4(struct pbuf
* pbuf
)
170 struct ip_hdr
*iphdr
;
172 if (pbuf
->len
< sizeof(struct ip_hdr
))
175 iphdr
= (struct ip_hdr
*)pbuf
->payload
;
178 * W. Richard Stevens also mentions ip_id, but at least on NetBSD that
179 * field seems to be swapped neither when sending nor when receiving..
181 IPH_LEN(iphdr
) = htons(IPH_LEN(iphdr
));
182 IPH_OFFSET(iphdr
) = htons(IPH_OFFSET(iphdr
));
186 * A packet has arrived on a raw socket. Since the same packet may have to be
187 * delivered to multiple raw sockets, we always return 0 (= not consumed) from
188 * this function. As such, we must make a copy of the given packet if we want
189 * to keep it, and never free it.
192 rawsock_input(void * arg
, struct raw_pcb
* pcb __unused
, struct pbuf
* psrc
,
193 const ip_addr_t
* srcaddr
)
195 struct rawsock
*raw
= (struct rawsock
*)arg
;
199 assert(raw
->raw_pcb
== pcb
);
202 * If adding this packet would cause the receive buffer to go beyond
203 * the current limit, drop the new packet. This is just an estimation,
204 * because the copy we are about to make may not take the exact same
205 * amount of memory, due to the fact that 1) the pbuf we're given has
206 * an unknown set of headers in front of it, and 2) we need to store
207 * extra information in our copy. The return value of this call, if
208 * not -1, is the number of bytes we need to reserve to store that
211 if ((hdrlen
= pktsock_test_input(&raw
->raw_pktsock
, psrc
)) < 0)
215 * Raw IPv6 sockets receive only the actual packet data, whereas raw
216 * IPv4 sockets receive the IP header as well.
218 if (ip_current_is_v6()) {
219 off
= ip_current_header_tot_len();
221 util_pbuf_header(psrc
, -off
);
223 if (!rawsock_check_v6(raw
, psrc
)) {
224 util_pbuf_header(psrc
, off
);
230 * For IPv6 sockets, drop the packet if it was sent as an IPv4
231 * packet and checksumming is enabled (this includes ICMPv6).
232 * Otherwise, the packet would bypass the above checks that we
233 * perform on IPv6 packets. Applications that want to use a
234 * dual-stack protocol with checksumming will have to do the
235 * checksum verification part themselves. Presumably the two
236 * different pseudoheaders would result in different checksums
237 * anyhow, so it would be useless to try to support that.
239 * Beyond that, for IPv4 packets on IPv6 sockets, hide the IPv4
242 if (rawsock_is_ipv6(raw
)) {
243 if (raw
->raw_pcb
->chksum_reqd
)
248 util_pbuf_header(psrc
, -off
);
254 * We need to make a copy of the incoming packet. If we eat the one
255 * given to us, this will 1) stop any other raw sockets from getting
256 * the same packet, 2) allow a single raw socket to discard all TCP/UDP
257 * traffic, and 3) present us with a problem on how to store ancillary
258 * data. Raw sockets are not that performance critical so the extra
259 * copy -even when not always necessary- is not that big of a deal.
261 if ((pbuf
= pchain_alloc(PBUF_RAW
, hdrlen
+ psrc
->tot_len
)) == NULL
) {
263 util_pbuf_header(psrc
, off
);
268 util_pbuf_header(pbuf
, -hdrlen
);
270 if (pbuf_copy(pbuf
, psrc
) != ERR_OK
)
271 panic("unexpected pbuf copy failure");
273 pbuf
->flags
|= psrc
->flags
& (PBUF_FLAG_LLMCAST
| PBUF_FLAG_LLBCAST
);
276 util_pbuf_header(psrc
, off
);
278 if (!rawsock_is_ipv6(raw
))
279 rawsock_adjust_v4(pbuf
);
281 pktsock_input(&raw
->raw_pktsock
, pbuf
, srcaddr
, 0);
287 * Create a raw socket.
290 rawsock_socket(int domain
, int protocol
, struct sock
** sockp
,
291 const struct sockevent_ops
** ops
)
297 if (protocol
< 0 || protocol
> UINT8_MAX
)
298 return EPROTONOSUPPORT
;
300 if (TAILQ_EMPTY(&raw_freelist
))
303 raw
= TAILQ_FIRST(&raw_freelist
);
306 * Initialize the structure. Do not memset it to zero, as it is still
307 * part of the linked free list. Initialization may still fail.
310 ip_type
= pktsock_socket(&raw
->raw_pktsock
, domain
, RAW_SNDBUF_DEF
,
311 RAW_RCVBUF_DEF
, sockp
);
313 /* We should have enough PCBs so this call should not fail.. */
314 if ((raw
->raw_pcb
= raw_new_ip_type(ip_type
, protocol
)) == NULL
)
316 raw_recv(raw
->raw_pcb
, rawsock_input
, (void *)raw
);
318 /* By default, the multicast TTL is 1 and looping is enabled. */
319 raw_set_multicast_ttl(raw
->raw_pcb
, 1);
321 flags
= raw_flags(raw
->raw_pcb
);
322 raw_setflags(raw
->raw_pcb
, flags
| RAW_FLAGS_MULTICAST_LOOP
);
325 * For ICMPv6, checksum generation and verification is mandatory and
326 * type filtering of incoming packets is supported (RFC 3542). For all
327 * other IPv6 protocols, checksumming may be turned on by the user.
329 if (rawsock_is_ipv6(raw
) && protocol
== IPPROTO_ICMPV6
) {
330 raw
->raw_pcb
->chksum_reqd
= 1;
331 raw
->raw_pcb
->chksum_offset
=
332 offsetof(struct icmp6_hdr
, icmp6_cksum
);
334 ICMP6_FILTER_SETPASSALL(&raw
->raw_icmp6filter
);
336 raw
->raw_pcb
->chksum_reqd
= 0;
338 TAILQ_REMOVE(&raw_freelist
, raw
, raw_next
);
340 TAILQ_INSERT_TAIL(&raw_activelist
, raw
, raw_next
);
343 return SOCKID_RAW
| (sockid_t
)(raw
- raw_array
);
347 * Bind a raw socket to a local address.
350 rawsock_bind(struct sock
* sock
, const struct sockaddr
* addr
,
351 socklen_t addr_len
, endpoint_t user_endpt
)
353 struct rawsock
*raw
= (struct rawsock
*)sock
;
359 * Raw sockets may be rebound even if that is not too useful. However,
360 * we do not allow (re)binding when the socket is connected, so as to
361 * eliminate any problems with source and destination type mismatches:
362 * such mismatches are detected at connect time, and rebinding would
363 * avoid those, possibly triggering lwIP asserts as a result.
365 if (rawsock_is_conn(raw
))
368 if ((r
= ipsock_get_src_addr(rawsock_get_ipsock(raw
), addr
, addr_len
,
369 user_endpt
, &raw
->raw_pcb
->local_ip
, 0 /*local_port*/,
370 TRUE
/*allow_mcast*/, &ipaddr
, NULL
/*portp*/)) != OK
)
373 err
= raw_bind(raw
->raw_pcb
, &ipaddr
);
375 return util_convert_err(err
);
379 * Connect a raw socket to a remote address.
382 rawsock_connect(struct sock
* sock
, const struct sockaddr
* addr
,
383 socklen_t addr_len
, endpoint_t user_endpt __unused
)
385 struct rawsock
*raw
= (struct rawsock
*)sock
;
386 const ip_addr_t
*src_addr
;
389 uint32_t ifindex
, ifindex2
;
394 * One may "unconnect" socket by providing an address with family
397 if (addr_is_unspec(addr
, addr_len
)) {
398 raw_disconnect(raw
->raw_pcb
);
403 if ((r
= ipsock_get_dst_addr(rawsock_get_ipsock(raw
), addr
, addr_len
,
404 &raw
->raw_pcb
->local_ip
, &dst_addr
, NULL
/*dst_port*/)) != OK
)
408 * Bind explicitly to a source address if the PCB is not bound to one
409 * yet. This is expected in the BSD socket API, but lwIP does not do
412 if (ip_addr_isany(&raw
->raw_pcb
->local_ip
)) {
413 /* Help the multicast case a bit, if possible. */
415 if (ip_addr_ismulticast(&dst_addr
)) {
416 ifindex
= pktsock_get_ifindex(&raw
->raw_pktsock
);
417 ifindex2
= raw_get_multicast_netif_index(raw
->raw_pcb
);
422 ifdev
= ifdev_get_by_index(ifindex
);
429 src_addr
= ifaddr_select(&dst_addr
, ifdev
, NULL
/*ifdevp*/);
431 if (src_addr
== NULL
)
434 err
= raw_bind(raw
->raw_pcb
, src_addr
);
437 return util_convert_err(err
);
441 * Connecting a raw socket serves two main purposes: 1) the socket uses
442 * the address as destination when sending, and 2) the socket receives
443 * packets from only the connected address.
445 err
= raw_connect(raw
->raw_pcb
, &dst_addr
);
448 return util_convert_err(err
);
454 * Perform preliminary checks on a send request.
457 rawsock_pre_send(struct sock
* sock
, size_t len
, socklen_t ctl_len __unused
,
458 const struct sockaddr
* addr
, socklen_t addr_len __unused
,
459 endpoint_t user_endpt __unused
, int flags
)
461 struct rawsock
*raw
= (struct rawsock
*)sock
;
463 if ((flags
& ~MSG_DONTROUTE
) != 0)
466 if (!rawsock_is_conn(raw
) && addr
== NULL
)
470 * This is only one part of the length check. The rest is done from
471 * rawsock_send(), once we have more information.
473 if (len
> ipsock_get_sndbuf(rawsock_get_ipsock(raw
)))
480 * Swap IP-level options between the RAW PCB and the packet options structure,
481 * for all options that have their flag set in the packet options structure.
482 * This function is called twice when sending a packet. The result is that the
483 * flagged options are overridden for only the packet being sent.
486 rawsock_swap_opt(struct rawsock
* raw
, struct pktopt
* pkto
)
488 uint8_t tos
, ttl
, mcast_ttl
;
490 if (pkto
->pkto_flags
& PKTOF_TOS
) {
491 tos
= raw
->raw_pcb
->tos
;
492 raw
->raw_pcb
->tos
= pkto
->pkto_tos
;
493 pkto
->pkto_tos
= tos
;
496 if (pkto
->pkto_flags
& PKTOF_TTL
) {
497 ttl
= raw
->raw_pcb
->ttl
;
498 mcast_ttl
= raw_get_multicast_ttl(raw
->raw_pcb
);
499 raw
->raw_pcb
->ttl
= pkto
->pkto_ttl
;
500 raw_set_multicast_ttl(raw
->raw_pcb
, pkto
->pkto_ttl
);
501 pkto
->pkto_ttl
= ttl
;
502 pkto
->pkto_mcast_ttl
= mcast_ttl
;
507 * We are about to send the given packet that already includes an IPv4 header,
508 * because the IP_HDRINCL option is enabled on a raw IPv4 socket. Prepare the
509 * IPv4 header for sending, by modifying a few fields in it, as expected by
513 rawsock_prepare_hdrincl(struct rawsock
* raw
, struct pbuf
* pbuf
,
514 const ip_addr_t
* src_addr
)
516 struct ip_hdr
*iphdr
;
520 * lwIP obtains the destination address from the IP packet header in
521 * this case, so make sure the packet has a full-sized header.
523 if (pbuf
->len
< sizeof(struct ip_hdr
))
526 iphdr
= (struct ip_hdr
*)pbuf
->payload
;
529 * Fill in the source address if it is not set, and do the byte
530 * swapping and checksum computation common for the BSDs, without which
531 * ping(8) and traceroute(8) do not work properly. We consider this a
532 * convenience feature, so malformed packets are simply sent as is.
533 * TODO: deal with type punning..
535 hlen
= (size_t)IPH_HL(iphdr
) << 2;
537 if (pbuf
->len
>= hlen
) {
538 /* Fill in the source address if it is blank. */
539 if (iphdr
->src
.addr
== PP_HTONL(INADDR_ANY
)) {
540 assert(IP_IS_V4(src_addr
));
542 iphdr
->src
.addr
= ip_addr_get_ip4_u32(src_addr
);
545 IPH_LEN(iphdr
) = htons(IPH_LEN(iphdr
));
546 IPH_OFFSET(iphdr
) = htons(IPH_OFFSET(iphdr
));
547 IPH_CHKSUM(iphdr
) = 0;
549 IPH_CHKSUM(iphdr
) = inet_chksum(iphdr
, hlen
);
556 * Send a packet on a raw socket.
559 rawsock_send(struct sock
* sock
, const struct sockdriver_data
* data
,
560 size_t len
, size_t * off
, const struct sockdriver_data
* ctl __unused
,
561 socklen_t ctl_len __unused
, socklen_t
* ctl_off __unused
,
562 const struct sockaddr
* addr
, socklen_t addr_len
,
563 endpoint_t user_endpt __unused
, int flags
, size_t min __unused
)
565 struct rawsock
*raw
= (struct rawsock
*)sock
;
566 struct pktopt pktopt
;
570 const ip_addr_t
*dst_addrp
, *src_addrp
;
571 ip_addr_t src_addr
, dst_addr
; /* for storage only; not always used! */
577 /* Copy in and parse any packet options. */
578 pktopt
.pkto_flags
= 0;
580 if ((r
= pktsock_get_ctl(&raw
->raw_pktsock
, ctl
, ctl_len
,
585 * For a more in-depth explanation of what is going on here, see the
586 * udpsock module, which has largely the same code but with more
587 * elaborate comments.
591 * Start by checking whether the source address and/or the outgoing
592 * interface are overridden using sticky and/or ancillary options.
594 if ((r
= pktsock_get_pktinfo(&raw
->raw_pktsock
, &pktopt
, &ifdev
,
598 if (ifdev
!= NULL
&& !ip_addr_isany(&src_addr
)) {
599 /* This is guaranteed to be a proper local unicast address. */
600 src_addrp
= &src_addr
;
602 src_addrp
= &raw
->raw_pcb
->local_ip
;
605 * If the socket is bound to a multicast address, use the
606 * unspecified ('any') address as source address instead. A
607 * real source address will then be selected further below.
609 if (ip_addr_ismulticast(src_addrp
))
610 src_addrp
= IP46_ADDR_ANY(IP_GET_TYPE(src_addrp
));
614 * Determine the destination address to use. If the socket is
615 * connected, always ignore any address provided in the send call.
617 if (!rawsock_is_conn(raw
)) {
618 assert(addr
!= NULL
); /* already checked in pre_send */
620 if ((r
= ipsock_get_dst_addr(rawsock_get_ipsock(raw
), addr
,
621 addr_len
, src_addrp
, &dst_addr
, NULL
/*dst_port*/)) != OK
)
624 dst_addrp
= &dst_addr
;
626 dst_addrp
= &raw
->raw_pcb
->remote_ip
;
629 * If the destination is a multicast address, select the outgoing
630 * interface based on the multicast interface index, if one is set.
631 * This must however *not* override an interface index already
632 * specified using IPV6_PKTINFO, as per RFC 3542 Sec. 6.7.
634 if (ifdev
== NULL
&& ip_addr_ismulticast(dst_addrp
)) {
635 ifindex
= raw_get_multicast_netif_index(raw
->raw_pcb
);
637 if (ifindex
!= NETIF_NO_INDEX
)
638 ifdev
= ifdev_get_by_index(ifindex
); /* (may fail) */
642 * If an interface has been determined already now, the send operation
643 * will bypass routing. In that case, we must perform our own checks
644 * on address zone violations, because those will not be made anywhere
645 * else. Subsequent steps below will never introduce violations.
647 if (ifdev
!= NULL
&& IP_IS_V6(dst_addrp
)) {
648 if (ifaddr_is_zone_mismatch(ip_2_ip6(dst_addrp
), ifdev
))
651 if (IP_IS_V6(src_addrp
) &&
652 ifaddr_is_zone_mismatch(ip_2_ip6(src_addrp
), ifdev
))
657 * If we do not yet have an interface at this point, perform a route
658 * lookup to determine the outgoing interface, unless MSG_DONTROUTE is
662 if (!(flags
& MSG_DONTROUTE
)) {
664 * ip_route() should never be called with an
665 * IPADDR_TYPE_ANY type address. This is a lwIP-
666 * internal requirement; while we override both routing
667 * functions, we do not deviate from it.
669 if (IP_IS_ANY_TYPE_VAL(*src_addrp
))
671 IP46_ADDR_ANY(IP_GET_TYPE(dst_addrp
));
673 /* Perform the route lookup. */
674 if ((netif
= ip_route(src_addrp
, dst_addrp
)) == NULL
)
677 ifdev
= netif_get_ifdev(netif
);
679 if ((ifdev
= ifaddr_map_by_subnet(dst_addrp
)) == NULL
)
685 * At this point we have an outgoing interface. If we do not have a
686 * source address yet, pick one now. As a sidenote, if the destination
687 * address is scoped but has no zone, we could also fill in the zone
688 * now. We let lwIP handle that instead, though.
690 assert(ifdev
!= NULL
);
692 if (ip_addr_isany(src_addrp
)) {
693 src_addrp
= ifaddr_select(dst_addrp
, ifdev
, NULL
/*ifdevp*/);
695 if (src_addrp
== NULL
)
700 * Now that we know the full conditions of what we are about to send,
701 * check whether the packet size leaves enough room for lwIP to prepend
702 * headers. If so, allocate a chain of pbufs for the packet.
704 assert(len
<= RAW_MAX_PAYLOAD
);
706 if (rawsock_is_hdrincl(raw
))
708 else if (IP_IS_V6(dst_addrp
))
713 if (hdrlen
+ len
> RAW_MAX_PAYLOAD
)
716 if ((pbuf
= pchain_alloc(PBUF_IP
, len
)) == NULL
)
719 /* Copy in the packet data. */
720 if ((r
= pktsock_get_data(&raw
->raw_pktsock
, data
, len
, pbuf
)) != OK
) {
727 * If the user has turned on IPV6_CHECKSUM, ensure that the packet is
728 * not only large enough to have the checksum stored at the configured
729 * place, but also that the checksum fits within the first pbuf: if we
730 * do not test this here, an assert will trigger in lwIP later. Also
731 * zero out the checksum field first, because lwIP does not do that.
733 if (raw
->raw_pcb
->chksum_reqd
) {
734 if (pbuf
->len
< raw
->raw_pcb
->chksum_offset
+
741 memset((char *)pbuf
->payload
+ raw
->raw_pcb
->chksum_offset
, 0,
746 * For sockets where an IPv4 header is already included in the packet,
747 * we need to alter a few header fields to be compatible with BSD.
749 if (rawsock_is_hdrincl(raw
) &&
750 (r
= rawsock_prepare_hdrincl(raw
, pbuf
, src_addrp
)) != OK
) {
756 /* Set broadcast/multicast flags for accounting purposes. */
757 if (ip_addr_ismulticast(dst_addrp
))
758 pbuf
->flags
|= PBUF_FLAG_LLMCAST
;
759 else if (ip_addr_isbroadcast(dst_addrp
, ifdev_get_netif(ifdev
)))
760 pbuf
->flags
|= PBUF_FLAG_LLBCAST
;
762 /* Send the packet. */
763 rawsock_swap_opt(raw
, &pktopt
);
765 assert(!ip_addr_isany(src_addrp
));
766 assert(!ip_addr_ismulticast(src_addrp
));
768 err
= raw_sendto_if_src(raw
->raw_pcb
, pbuf
, dst_addrp
,
769 ifdev_get_netif(ifdev
), src_addrp
);
771 rawsock_swap_opt(raw
, &pktopt
);
773 /* Free the pbuf again. */
777 * On success, make sure to return the size of the sent packet as well.
778 * As an aside: ctl_off need not be updated, as it is not returned.
780 if ((r
= util_convert_err(err
)) == OK
)
786 * Update the set of flag-type socket options on a raw socket.
789 rawsock_setsockmask(struct sock
* sock
, unsigned int mask
)
791 struct rawsock
*raw
= (struct rawsock
*)sock
;
794 * FIXME: raw sockets are not supposed to have a broardcast check, so
795 * perhaps just remove this and instead always set SOF_BROADCAST?
797 if (mask
& SO_BROADCAST
)
798 ip_set_option(raw
->raw_pcb
, SOF_BROADCAST
);
800 ip_reset_option(raw
->raw_pcb
, SOF_BROADCAST
);
804 * Prepare a helper structure for IP-level option processing.
807 rawsock_get_ipopts(struct rawsock
* raw
, struct ipopts
* ipopts
)
810 ipopts
->local_ip
= &raw
->raw_pcb
->local_ip
;
811 ipopts
->remote_ip
= &raw
->raw_pcb
->remote_ip
;
812 ipopts
->tos
= &raw
->raw_pcb
->tos
;
813 ipopts
->ttl
= &raw
->raw_pcb
->ttl
;
814 ipopts
->sndmin
= RAW_SNDBUF_MIN
;
815 ipopts
->sndmax
= RAW_SNDBUF_MAX
;
816 ipopts
->rcvmin
= RAW_RCVBUF_MIN
;
817 ipopts
->rcvmax
= RAW_RCVBUF_MAX
;
821 * Set socket options on a raw socket.
824 rawsock_setsockopt(struct sock
* sock
, int level
, int name
,
825 const struct sockdriver_data
* data
, socklen_t len
)
827 struct rawsock
*raw
= (struct rawsock
*)sock
;
828 struct ipopts ipopts
;
829 struct icmp6_filter filter
;
831 struct in_addr in_addr
;
839 * Unfortunately, we have to duplicate most of the multicast options
840 * rather than sharing them with udpsock at the pktsock level. The
841 * reason is that each of the PCBs have their own multicast abstraction
842 * functions and so we cannot merge the rest. Same for getsockopt.
847 if (rawsock_is_ipv6(raw
))
852 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
857 raw_setflags(raw
->raw_pcb
,
858 raw_flags(raw
->raw_pcb
) |
861 raw_setflags(raw
->raw_pcb
,
862 raw_flags(raw
->raw_pcb
) &
868 case IP_MULTICAST_IF
:
869 pktsock_set_mcaware(&raw
->raw_pktsock
);
871 if ((r
= sockdriver_copyin_opt(data
, &in_addr
,
872 sizeof(in_addr
), len
)) != OK
)
875 ip_addr_set_ip4_u32(&ipaddr
, in_addr
.s_addr
);
877 if ((ifdev
= ifaddr_map_by_addr(&ipaddr
)) == NULL
)
878 return EADDRNOTAVAIL
;
880 raw_set_multicast_netif_index(raw
->raw_pcb
,
881 ifdev_get_index(ifdev
));
885 case IP_MULTICAST_LOOP
:
886 pktsock_set_mcaware(&raw
->raw_pktsock
);
888 if ((r
= sockdriver_copyin_opt(data
, &byte
,
889 sizeof(byte
), len
)) != OK
)
892 flags
= raw_flags(raw
->raw_pcb
);
895 flags
|= RAW_FLAGS_MULTICAST_LOOP
;
897 flags
&= ~RAW_FLAGS_MULTICAST_LOOP
;
899 raw_setflags(raw
->raw_pcb
, flags
);
903 case IP_MULTICAST_TTL
:
904 pktsock_set_mcaware(&raw
->raw_pktsock
);
906 if ((r
= sockdriver_copyin_opt(data
, &byte
,
907 sizeof(byte
), len
)) != OK
)
910 raw_set_multicast_ttl(raw
->raw_pcb
, byte
);
918 if (!rawsock_is_ipv6(raw
))
923 /* ICMPv6 checksums are always computed. */
924 if (raw
->raw_pcb
->protocol
== IPPROTO_ICMPV6
)
927 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
932 raw
->raw_pcb
->chksum_reqd
= 0;
935 } else if (val
>= 0 && !(val
& 1)) {
936 raw
->raw_pcb
->chksum_reqd
= 1;
937 raw
->raw_pcb
->chksum_offset
= val
;
943 case IPV6_MULTICAST_IF
:
944 pktsock_set_mcaware(&raw
->raw_pktsock
);
946 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
951 ifindex
= (uint32_t)val
;
953 ifdev
= ifdev_get_by_index(ifindex
);
958 ifindex
= NETIF_NO_INDEX
;
960 raw_set_multicast_netif_index(raw
->raw_pcb
, ifindex
);
964 case IPV6_MULTICAST_LOOP
:
965 pktsock_set_mcaware(&raw
->raw_pktsock
);
967 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
971 if (val
< 0 || val
> 1)
974 flags
= raw_flags(raw
->raw_pcb
);
977 flags
|= RAW_FLAGS_MULTICAST_LOOP
;
979 flags
&= ~RAW_FLAGS_MULTICAST_LOOP
;
982 * lwIP's IPv6 functionality does not actually check
983 * this flag at all yet. We set it in the hope that
984 * one day this will magically start working.
986 raw_setflags(raw
->raw_pcb
, flags
);
990 case IPV6_MULTICAST_HOPS
:
991 pktsock_set_mcaware(&raw
->raw_pktsock
);
993 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
997 if (val
< -1 || val
> UINT8_MAX
)
1003 raw_set_multicast_ttl(raw
->raw_pcb
, val
);
1010 case IPPROTO_ICMPV6
:
1011 if (!rawsock_is_ipv6(raw
) ||
1012 raw
->raw_pcb
->protocol
!= IPPROTO_ICMPV6
)
1017 /* Who comes up with these stupid exceptions? */
1019 ICMP6_FILTER_SETPASSALL(&raw
->raw_icmp6filter
);
1024 if ((r
= sockdriver_copyin_opt(data
, &filter
,
1025 sizeof(filter
), len
)) != OK
)
1029 * As always, never copy in the data into the actual
1030 * destination, as any copy may run into a copy fault
1031 * halfway through, potentially leaving the destination
1032 * in a half-updated and thus corrupted state.
1034 memcpy(&raw
->raw_icmp6filter
, &filter
, sizeof(filter
));
1040 rawsock_get_ipopts(raw
, &ipopts
);
1042 return pktsock_setsockopt(&raw
->raw_pktsock
, level
, name
, data
, len
,
1047 * Retrieve socket options on a raw socket.
1050 rawsock_getsockopt(struct sock
* sock
, int level
, int name
,
1051 const struct sockdriver_data
* data
, socklen_t
* len
)
1053 struct rawsock
*raw
= (struct rawsock
*)sock
;
1054 struct ipopts ipopts
;
1055 const ip4_addr_t
*ip4addr
;
1056 struct in_addr in_addr
;
1057 struct ifdev
*ifdev
;
1065 if (rawsock_is_ipv6(raw
))
1070 val
= !!rawsock_is_hdrincl(raw
);
1072 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1075 case IP_MULTICAST_IF
:
1076 ifindex
= raw_get_multicast_netif_index(raw
->raw_pcb
);
1079 * Map back from the interface index to the IPv4
1080 * address assigned to the corresponding interface.
1081 * Should this not work out, return the 'any' address.
1083 if (ifindex
!= NETIF_NO_INDEX
&&
1084 (ifdev
= ifdev_get_by_index(ifindex
)) != NULL
) {
1086 netif_ip4_addr(ifdev_get_netif(ifdev
));
1088 in_addr
.s_addr
= ip4_addr_get_u32(ip4addr
);
1090 in_addr
.s_addr
= PP_HTONL(INADDR_ANY
);
1092 return sockdriver_copyout_opt(data
, &in_addr
,
1093 sizeof(in_addr
), len
);
1095 case IP_MULTICAST_LOOP
:
1096 flags
= raw_flags(raw
->raw_pcb
);
1098 byte
= !!(flags
& RAW_FLAGS_MULTICAST_LOOP
);
1100 return sockdriver_copyout_opt(data
, &byte
,
1103 case IP_MULTICAST_TTL
:
1104 byte
= raw_get_multicast_ttl(raw
->raw_pcb
);
1106 return sockdriver_copyout_opt(data
, &byte
,
1113 if (!rawsock_is_ipv6(raw
))
1118 if (raw
->raw_pcb
->chksum_reqd
)
1119 val
= raw
->raw_pcb
->chksum_offset
;
1123 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1126 case IPV6_MULTICAST_IF
:
1127 ifindex
= raw_get_multicast_netif_index(raw
->raw_pcb
);
1131 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1134 case IPV6_MULTICAST_LOOP
:
1135 flags
= raw_flags(raw
->raw_pcb
);
1137 val
= !!(flags
& RAW_FLAGS_MULTICAST_LOOP
);
1139 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1142 case IPV6_MULTICAST_HOPS
:
1143 val
= raw_get_multicast_ttl(raw
->raw_pcb
);
1145 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1151 case IPPROTO_ICMPV6
:
1152 if (!rawsock_is_ipv6(raw
) ||
1153 raw
->raw_pcb
->protocol
!= IPPROTO_ICMPV6
)
1158 return sockdriver_copyout_opt(data
,
1159 &raw
->raw_icmp6filter
,
1160 sizeof(raw
->raw_icmp6filter
), len
);
1166 rawsock_get_ipopts(raw
, &ipopts
);
1168 return pktsock_getsockopt(&raw
->raw_pktsock
, level
, name
, data
, len
,
1173 * Retrieve the local socket address of a raw socket.
1176 rawsock_getsockname(struct sock
* sock
, struct sockaddr
* addr
,
1177 socklen_t
* addr_len
)
1179 struct rawsock
*raw
= (struct rawsock
*)sock
;
1181 ipsock_put_addr(rawsock_get_ipsock(raw
), addr
, addr_len
,
1182 &raw
->raw_pcb
->local_ip
, 0 /*port*/);
1188 * Retrieve the remote socket address of a raw socket.
1191 rawsock_getpeername(struct sock
* sock
, struct sockaddr
* addr
,
1192 socklen_t
* addr_len
)
1194 struct rawsock
*raw
= (struct rawsock
*)sock
;
1196 if (!rawsock_is_conn(raw
))
1199 ipsock_put_addr(rawsock_get_ipsock(raw
), addr
, addr_len
,
1200 &raw
->raw_pcb
->remote_ip
, 0 /*port*/);
1206 * Shut down a raw socket for reading and/or writing.
1209 rawsock_shutdown(struct sock
* sock
, unsigned int mask
)
1211 struct rawsock
*raw
= (struct rawsock
*)sock
;
1213 if (mask
& SFL_SHUT_RD
)
1214 raw_recv(raw
->raw_pcb
, NULL
, NULL
);
1216 pktsock_shutdown(&raw
->raw_pktsock
, mask
);
1222 * Close a raw socket.
1225 rawsock_close(struct sock
* sock
, int force __unused
)
1227 struct rawsock
*raw
= (struct rawsock
*)sock
;
1229 raw_recv(raw
->raw_pcb
, NULL
, NULL
);
1231 raw_remove(raw
->raw_pcb
);
1232 raw
->raw_pcb
= NULL
;
1234 pktsock_close(&raw
->raw_pktsock
);
1240 * Free up a closed raw socket.
1243 rawsock_free(struct sock
* sock
)
1245 struct rawsock
*raw
= (struct rawsock
*)sock
;
1247 assert(raw
->raw_pcb
== NULL
);
1249 TAILQ_REMOVE(&raw_activelist
, raw
, raw_next
);
1251 TAILQ_INSERT_HEAD(&raw_freelist
, raw
, raw_next
);
1255 * Fill the given kinfo_pcb sysctl(7) structure with information about the RAW
1256 * PCB identified by the given pointer.
1259 rawsock_get_info(struct kinfo_pcb
* ki
, const void * ptr
)
1261 const struct raw_pcb
*pcb
= (const struct raw_pcb
*)ptr
;
1262 struct rawsock
*raw
;
1264 /* We iterate our own list so we can't find "strange" PCBs. */
1265 raw
= (struct rawsock
*)pcb
->recv_arg
;
1266 assert(raw
>= raw_array
&&
1267 raw
< &raw_array
[__arraycount(raw_array
)]);
1269 ki
->ki_type
= SOCK_RAW
;
1270 ki
->ki_protocol
= pcb
->protocol
;
1272 ipsock_get_info(ki
, &pcb
->local_ip
, 0 /*local_port*/,
1273 &raw
->raw_pcb
->remote_ip
, 0 /*remote_port*/);
1275 /* TODO: change this so that sockstat(1) may work one day. */
1276 ki
->ki_sockaddr
= (uint64_t)(uintptr_t)rawsock_get_sock(raw
);
1278 ki
->ki_rcvq
= pktsock_get_recvlen(&raw
->raw_pktsock
);
1280 if (rawsock_is_hdrincl(raw
))
1281 ki
->ki_pflags
|= INP_HDRINCL
;
1285 * Given either NULL or a previously returned RAW PCB pointer, return the first
1286 * or next RAW PCB pointer, or NULL if there are no more. lwIP does not expose
1287 * 'raw_pcbs', but other modules in this service may also use RAW PCBs (which
1288 * should then stay hidden), so we iterate through our own list instead.
1291 rawsock_enum(const void * last
)
1293 const struct raw_pcb
*pcb
;
1294 struct rawsock
*raw
;
1297 pcb
= (const struct raw_pcb
*)last
;
1299 raw
= (struct rawsock
*)pcb
->recv_arg
;
1300 assert(raw
>= raw_array
&&
1301 raw
< &raw_array
[__arraycount(raw_array
)]);
1303 raw
= TAILQ_NEXT(raw
, raw_next
);
1305 raw
= TAILQ_FIRST(&raw_activelist
);
1308 return raw
->raw_pcb
;
1314 * Obtain the list of RAW protocol control blocks, for sysctl(7).
1317 rawsock_pcblist(struct rmib_call
* call
, struct rmib_node
* node
,
1318 struct rmib_oldp
* oldp
, struct rmib_newp
* newp __unused
)
1321 return util_pcblist(call
, oldp
, rawsock_enum
, rawsock_get_info
);
1324 static const struct sockevent_ops rawsock_ops
= {
1325 .sop_bind
= rawsock_bind
,
1326 .sop_connect
= rawsock_connect
,
1327 .sop_pre_send
= rawsock_pre_send
,
1328 .sop_send
= rawsock_send
,
1329 .sop_pre_recv
= pktsock_pre_recv
,
1330 .sop_recv
= pktsock_recv
,
1331 .sop_test_recv
= pktsock_test_recv
,
1332 .sop_ioctl
= ifconf_ioctl
,
1333 .sop_setsockmask
= rawsock_setsockmask
,
1334 .sop_setsockopt
= rawsock_setsockopt
,
1335 .sop_getsockopt
= rawsock_getsockopt
,
1336 .sop_getsockname
= rawsock_getsockname
,
1337 .sop_getpeername
= rawsock_getpeername
,
1338 .sop_shutdown
= rawsock_shutdown
,
1339 .sop_close
= rawsock_close
,
1340 .sop_free
= rawsock_free