1 /* LWIP service - pktsock.c - packet code shared between UDP and RAW */
8 * This buffer should be much bigger (at least 10KB, according to RFC 3542),
9 * but we do not support the ancillary options that take so much space anyway.
11 #define PKTSOCK_CTLBUF_SIZE 256
13 static char pktsock_ctlbuf
[PKTSOCK_CTLBUF_SIZE
];
16 * Header structures with ancillary data for received packets. The reason that
17 * we do not simply use a generic pkthdr structure with ip_addr_t source and
18 * destination addresses, is that for UDP packets, we put this structure in
19 * place of the received (ethernet and IP headers), and such a full structure
20 * (including IPv6-size addresses) would not fit in the header space for IPv4
21 * packets. So instead we use two address structures, one for IPv4 and one for
22 * IPv6, and a generic header structure on top of it, which also identifies
23 * which address structure is underneath. The combination of the address
24 * structure and the header structure must fit in the IP header. The IPv6
25 * packet header is already so close to the limit here that we have to use
26 * packed addresses. For IPv4 we use the regular addresses for simplicity.
29 uint16_t port
; /* source port number (UDP only) */
30 uint8_t dstif
; /* interface that received the pkt */
31 uint8_t addrif
; /* interface that accepted the pkt */
32 uint8_t tos
; /* TOS/TC value from the IP header */
33 uint8_t ttl
; /* TTL/HL value from the IP header */
34 uint8_t flags
; /* packet flags (PKTHF_) */
35 uint8_t _unused
; /* all that is still available.. */
38 #define PKTHF_IPV6 0x01 /* packet has IPv6 header */
39 #define PKTHF_MCAST 0x02 /* packet has multicast destination */
40 #define PKTHF_BCAST 0x04 /* packet has broadcast destination */
53 * Create a packet socket. Relay parameters and return values to and from the
54 * IP module's socket creation function. This function must not allocate any
55 * resources in any form, as socket creation may still fail later, in which
56 * case no destruction function is called.
59 pktsock_socket(struct pktsock
* pkt
, int domain
, size_t sndbuf
, size_t rcvbuf
,
63 pkt
->pkt_rcvhead
= NULL
;
64 pkt
->pkt_rcvtailp
= &pkt
->pkt_rcvhead
;
67 mcast_reset(&pkt
->pkt_mcast
);
69 memset(&pkt
->pkt_srcaddr
, 0, sizeof(pkt
->pkt_srcaddr
));
73 * Any PKTF_ type flags should be initialized on the socket only after
74 * the following call, as this call will clear the flags field. For
75 * now, no PKTF_ flags need to be set by default, though.
77 return ipsock_socket(&pkt
->pkt_ipsock
, domain
, sndbuf
, rcvbuf
, sockp
);
81 * Return TRUE if the given packet can and should be received on the given
82 * socket, or FALSE if there is a reason not to receive the packet.
85 pktsock_may_recv(struct pktsock
* pkt
, struct pbuf
* pbuf
)
89 * By policy, multicast packets should not be received on sockets of
90 * which the owning application is not multicast aware.
92 if (ip_addr_ismulticast(ip_current_dest_addr()) &&
93 !(ipsock_get_flag(&pkt
->pkt_ipsock
, PKTF_MCAWARE
)))
97 * Due to fragment reassembly, we might end up with packets that take
98 * up more buffer space than their byte size, even after rounding up
99 * the latter. The user probably does not want packets to get dropped
100 * for that reason, e.g. when they set a 64K limit and the packet ends
101 * up being estimated as 65K and dropped. So, we test against
102 * 'pbuf->tot_len' rather than the rounded-up packet size. However,
103 * 'pkt->pkt_rcvlen' itself is increased by the rounded-up packet size
104 * when enqueuing the packet, so that we still count the memory
105 * consumption (generally) conservatively, which is what we want.
107 return (pkt
->pkt_rcvlen
+ pbuf
->tot_len
<=
108 ipsock_get_rcvbuf(&pkt
->pkt_ipsock
));
112 * Check whether the given packet can and should be received on the given
113 * socket. If so, return the amount of space for ancillary information that
114 * will be necessary for the packet. If not, return a negative value.
117 pktsock_test_input(struct pktsock
* pkt
, struct pbuf
* pbuf
)
121 * This check will be done again in pktsock_input(), but this function
122 * is called for raw packets only (not for UDP packets) and, if this
123 * (cheap) check fails, we can avoid a (rather expensive) packet copy.
125 if (!pktsock_may_recv(pkt
, pbuf
))
128 if (ip_current_is_v6())
129 return (int)(sizeof(struct pktaddr6
) + sizeof(struct pkthdr
));
131 return (int)(sizeof(struct pktaddr4
) + sizeof(struct pkthdr
));
135 * A packet has arrived on a packet socket. We own the given packet buffer,
136 * and so we must free it if we do not want to keep it.
139 pktsock_input(struct pktsock
* pkt
, struct pbuf
* pbuf
,
140 const ip_addr_t
* srcaddr
, uint16_t port
)
142 struct pktaddr4 pktaddr4
;
143 struct pktaddr6 pktaddr6
;
144 struct pkthdr pkthdr
;
150 * We are going to mess with the packet's header and contents, so we
151 * must be the exclusive owner of the packet. For UDP packets, lwIP
152 * must have made a copy for us in case of non-exclusive delivery
153 * (e.g., multicast packets). For raw packets, we have made a copy of
154 * the packet ourselves just before the call to this function.
157 panic("input packet has multiple references!");
159 /* If the packet should not be received on this socket, drop it. */
160 if (!pktsock_may_recv(pkt
, pbuf
)) {
167 * Enqueue the packet. Overwrite the leading IP header with packet
168 * information that is used at the time of receipt by userland. The
169 * data structures are such that the information always fits in what
170 * was the IP header. The reference count check earlier ensures that
171 * we never overwrite part of a packet that is still in use elsewhere.
173 if (ip_current_is_v6()) {
174 assert(IP_IS_V6(srcaddr
));
175 assert(ip6_current_dest_addr() != NULL
);
177 ip6_addr_copy_to_packed(pktaddr6
.srcaddr
, *ip_2_ip6(srcaddr
));
178 ip6_addr_copy_to_packed(pktaddr6
.dstaddr
,
179 *ip6_current_dest_addr());
181 pktaddrlen
= sizeof(pktaddr6
);
183 assert(pktaddrlen
+ sizeof(pkthdr
) <= IP6_HLEN
);
185 pkthdr
.tos
= IP6H_TC(ip6_current_header());
186 pkthdr
.ttl
= IP6H_HOPLIM(ip6_current_header());
187 pkthdr
.flags
= PKTHF_IPV6
;
189 assert(IP_IS_V4(srcaddr
));
190 assert(ip4_current_dest_addr() != NULL
);
192 memcpy(&pktaddr4
.srcaddr
, ip_2_ip4(srcaddr
),
193 sizeof(pktaddr4
.srcaddr
));
194 memcpy(&pktaddr4
.dstaddr
, ip4_current_dest_addr(),
195 sizeof(pktaddr4
.srcaddr
));
197 pktaddrlen
= sizeof(pktaddr4
);
199 assert(pktaddrlen
+ sizeof(pkthdr
) <= IP_HLEN
);
201 pkthdr
.tos
= IPH_TOS(ip4_current_header());
202 pkthdr
.ttl
= IPH_TTL(ip4_current_header());
207 * Save both the interface on which the packet was received (for
208 * PKTINFO) and the interface that owns the destination address of the
209 * packet (for the source address's zone ID).
211 assert(ip_current_input_netif() != NULL
);
212 ifdev
= netif_get_ifdev(ip_current_input_netif());
213 pkthdr
.dstif
= (uint16_t)ifdev_get_index(ifdev
);
215 assert(ip_current_netif() != NULL
);
216 ifdev
= netif_get_ifdev(ip_current_netif());
217 pkthdr
.addrif
= (uint16_t)ifdev_get_index(ifdev
);
219 if ((pbuf
->flags
& PBUF_FLAG_LLMCAST
) ||
220 ip_addr_ismulticast(ip_current_dest_addr()))
221 pkthdr
.flags
|= PKTHF_MCAST
;
222 else if ((pbuf
->flags
& PBUF_FLAG_LLBCAST
) ||
223 ip_addr_isbroadcast(ip_current_dest_addr(), ip_current_netif()))
224 pkthdr
.flags
|= PKTHF_BCAST
;
228 util_pbuf_header(pbuf
, sizeof(pkthdr
));
230 memcpy(pbuf
->payload
, &pkthdr
, sizeof(pkthdr
));
232 util_pbuf_header(pbuf
, pktaddrlen
);
234 memcpy(pbuf
->payload
, pktaddr
, pktaddrlen
);
236 util_pbuf_header(pbuf
, -(int)(sizeof(pkthdr
) + pktaddrlen
));
238 *pkt
->pkt_rcvtailp
= pbuf
;
239 pkt
->pkt_rcvtailp
= pchain_end(pbuf
);
240 pkt
->pkt_rcvlen
+= pchain_size(pbuf
);
242 sockevent_raise(ipsock_get_sock(&pkt
->pkt_ipsock
), SEV_RECV
);
246 * Obtain interface and source address information for an outgoing packet. In
247 * particular, parse any IPV6_PKTINFO options provided as either sticky options
248 * on the socket 'pkt' or as ancillary options in the packet options 'pkto'.
249 * On success, return OK, with 'ifdevp' set to either the outgoing interface to
250 * use for the packet, or NULL if no outgoing interface was specified using
251 * either of the aforementioned options. If, and only if, 'ifdevp' is set to
252 * an actual interface (i.e., not NULL), then 'src_addrp' is filled with either
253 * a locally owned, validated, unicast address to use as source of the packet,
254 * or the unspecified ('any') address if no source address was specified using
255 * the options. On failure, return a negative error code.
258 pktsock_get_pktinfo(struct pktsock
* pkt
, struct pktopt
* pkto
,
259 struct ifdev
** ifdevp
, ip_addr_t
* src_addrp
)
261 struct ifdev
*ifdev
, *ifdev2
;
266 /* We support only IPV6_PKTINFO. IP_PKTINFO is not supported. */
267 if (!ipsock_is_ipv6(&pkt
->pkt_ipsock
)) {
273 * TODO: we are spending a lot of effort on initializing and copying
274 * stuff around, even just to find out whether there is anything to do
275 * at all here. See if this can be optimized.
277 ip_addr_set_zero_ip6(&ipaddr
);
280 * Ancillary data takes precedence over sticky options. We treat the
281 * source address and interface index fields as separate, overriding
282 * each earlier value only if non-zero. TODO: is that correct?
284 if (pkto
->pkto_flags
& PKTOF_PKTINFO
) {
285 memcpy(ip_2_ip6(&ipaddr
)->addr
, &pkto
->pkto_srcaddr
.addr
,
286 sizeof(ip_2_ip6(&ipaddr
)->addr
));
287 ifindex
= pkto
->pkto_ifindex
;
291 if (ip6_addr_isany(ip_2_ip6(&ipaddr
)))
292 memcpy(ip_2_ip6(&ipaddr
)->addr
, &pkt
->pkt_srcaddr
.addr
,
293 sizeof(ip_2_ip6(&ipaddr
)->addr
));
295 ifindex
= pkt
->pkt_ifindex
;
297 /* If both fields are blank, there is nothing more to do. */
298 if (ip6_addr_isany(ip_2_ip6(&ipaddr
)) && ifindex
== 0) {
303 /* If an interface index is specified, it must be valid. */
306 if (ifindex
!= 0 && (ifdev
= ifdev_get_by_index(ifindex
)) == NULL
)
310 * Use the interface index to set a zone on the source address, if the
311 * source address has a scope.
313 if (ip6_addr_has_scope(ip_2_ip6(&ipaddr
), IP6_UNKNOWN
)) {
315 return EADDRNOTAVAIL
;
317 ip6_addr_set_zone(ip_2_ip6(&ipaddr
), ifindex
);
321 * We need to validate the given address just as thoroughly as an
322 * address given through bind(). If we don't, we could allow forged
323 * source addresses etcetera. To be sure: this call may change the
324 * address to an IPv4 type address if needed.
326 if ((r
= ipsock_check_src_addr(pktsock_get_ipsock(pkt
), &ipaddr
,
327 FALSE
/*allow_mcast*/, &ifdev2
)) != OK
)
330 if (ifdev2
!= NULL
) {
333 else if (ifdev
!= ifdev2
)
334 return EADDRNOTAVAIL
;
337 * There should be no cases where the (non-multicast) address
338 * successfully parsed, is not unspecified, and yet did not map
339 * to an interface. Eliminate the possibility anyway by
340 * throwing an error for this case. As a result, we are left
341 * with one of two cases:
343 * 1) ifdevp is not NULL, and src_addrp is unspecified;
344 * 2) ifdevp is not NULL, and src_addrp is a locally assigned
347 * This is why we need not fill src_addrp when ifdevp is NULL.
349 if (!ip_addr_isany(&ipaddr
))
350 return EADDRNOTAVAIL
;
360 * Parse a chunk of user-provided control data, on an IPv4 socket provided as
361 * 'pkt'. The control chunk is given as 'cmsg', and the length of the data
362 * following the control header (possibly zero) is given as 'len'. On success,
363 * return OK, with any parsed options merged into the set of packet options
364 * 'pkto'. On failure, return a negative error code.
367 pktsock_parse_ctl_v4(struct pktsock
* pkt __unused
, struct cmsghdr
* cmsg
,
368 socklen_t len
, struct pktopt
* pkto
)
373 if (cmsg
->cmsg_level
!= IPPROTO_IP
)
376 switch (cmsg
->cmsg_type
) {
379 * Some userland code (bind's libisc in particular) supplies
380 * a single byte instead of a full integer for this option.
381 * We go out of our way to accept that format, too.
383 if (len
!= sizeof(val
) && len
!= sizeof(byte
))
386 if (len
== sizeof(byte
)) {
387 memcpy(&byte
, CMSG_DATA(cmsg
), sizeof(byte
));
390 memcpy(&val
, CMSG_DATA(cmsg
), sizeof(val
));
392 if (val
< 0 || val
> UINT8_MAX
)
395 pkto
->pkto_flags
|= PKTOF_TOS
;
396 pkto
->pkto_tos
= (uint8_t)val
;
401 if (len
!= sizeof(val
))
404 memcpy(&val
, CMSG_DATA(cmsg
), sizeof(val
));
406 if (val
< 0 || val
> UINT8_MAX
)
409 pkto
->pkto_flags
|= PKTOF_TTL
;
410 pkto
->pkto_ttl
= (uint8_t)val
;
415 * Implementing IP_PKTINFO might be a bit harder than its IPV6_PKTINFO
416 * sibling, because it would require the use of zone IDs (interface
417 * indices) for IPv4, which is not supported yet.
425 * Parse a chunk of user-provided control data, on an IPv6 socket provided as
426 * 'pkt'. The control chunk is given as 'cmsg', and the length of the data
427 * following the control header (possibly zero) is given as 'len'. On success,
428 * return OK, with any parsed options merged into the set of packet options
429 * 'pkto'. On failure, return a negative error code.
432 pktsock_parse_ctl_v6(struct pktsock
* pkt
, struct cmsghdr
* cmsg
,
433 socklen_t len
, struct pktopt
* pkto
)
435 struct in6_pktinfo ipi6
;
438 if (cmsg
->cmsg_level
!= IPPROTO_IPV6
)
441 switch (cmsg
->cmsg_type
) {
443 if (len
!= sizeof(val
))
446 memcpy(&val
, CMSG_DATA(cmsg
), sizeof(val
));
448 if (val
< -1 || val
> UINT8_MAX
)
454 pkto
->pkto_flags
|= PKTOF_TOS
;
455 pkto
->pkto_tos
= (uint8_t)val
;
460 if (len
!= sizeof(val
))
463 memcpy(&val
, CMSG_DATA(cmsg
), sizeof(val
));
465 if (val
< -1 || val
> UINT8_MAX
)
469 val
= IP_DEFAULT_TTL
;
471 pkto
->pkto_flags
|= PKTOF_TTL
;
472 pkto
->pkto_ttl
= (uint8_t)val
;
477 if (len
!= sizeof(ipi6
))
480 memcpy(&ipi6
, CMSG_DATA(cmsg
), sizeof(ipi6
));
482 pkto
->pkto_flags
|= PKTOF_PKTINFO
;
483 memcpy(&pkto
->pkto_srcaddr
.addr
, &ipi6
.ipi6_addr
,
484 sizeof(pkto
->pkto_srcaddr
.addr
));
485 pkto
->pkto_ifindex
= ipi6
.ipi6_ifindex
;
489 case IPV6_USE_MIN_MTU
:
490 if (len
!= sizeof(int))
493 memcpy(&val
, CMSG_DATA(cmsg
), sizeof(val
));
495 if (val
< -1 || val
> 1)
498 /* TODO: not supported by lwIP, but needed by applications. */
506 * Copy in and parse control data, as part of sending a packet on socket 'pkt'.
507 * The control data is accessible through 'ctl', with a user-provided length of
508 * 'ctl_len'. On success, return OK, with any parsed packet options stored in
509 * 'pkto'. On failure, return a negative error code.
512 pktsock_get_ctl(struct pktsock
* pkt
, const struct sockdriver_data
* ctl
,
513 socklen_t ctl_len
, struct pktopt
* pkto
)
515 struct msghdr msghdr
;
516 struct cmsghdr
*cmsg
;
520 /* The default: no packet options are being overridden. */
521 assert(pkto
->pkto_flags
== 0);
523 /* If no control length is given, we are done here. */
528 * For now, we put a rather aggressive limit on the size of the control
529 * data. We copy in and parse the whole thing in a single buffer.
531 if (ctl_len
> sizeof(pktsock_ctlbuf
)) {
532 printf("LWIP: too much control data given (%u bytes)\n",
538 if ((r
= sockdriver_copyin(ctl
, 0, pktsock_ctlbuf
, ctl_len
)) != OK
)
541 memset(&msghdr
, 0, sizeof(msghdr
));
542 msghdr
.msg_control
= pktsock_ctlbuf
;
543 msghdr
.msg_controllen
= ctl_len
;
545 for (cmsg
= CMSG_FIRSTHDR(&msghdr
); cmsg
!= NULL
;
546 cmsg
= CMSG_NXTHDR(&msghdr
, cmsg
)) {
547 /* Check for bogus lengths. */
548 assert((socklen_t
)((char *)cmsg
- pktsock_ctlbuf
) <= ctl_len
);
549 left
= ctl_len
- (socklen_t
)((char *)cmsg
- pktsock_ctlbuf
);
550 assert(left
>= CMSG_LEN(0)); /* guaranteed by CMSG_xxHDR */
552 if (cmsg
->cmsg_len
< CMSG_LEN(0) || cmsg
->cmsg_len
> left
) {
553 printf("LWIP: malformed control data rejected\n");
558 len
= cmsg
->cmsg_len
- CMSG_LEN(0);
560 if (ipsock_is_ipv6(&pkt
->pkt_ipsock
))
561 r
= pktsock_parse_ctl_v6(pkt
, cmsg
, len
, pkto
);
563 r
= pktsock_parse_ctl_v4(pkt
, cmsg
, len
, pkto
);
573 * Copy in the packet data from the calling user process, and store it in the
574 * buffer 'pbuf' that must already have been allocated with the appropriate
578 pktsock_get_data(struct pktsock
* pkt
, const struct sockdriver_data
* data
,
579 size_t len
, struct pbuf
* pbuf
)
583 return util_copy_data(data
, len
, 0, pbuf
, 0, TRUE
/*copy_in*/);
587 * Dequeue and free the head of the receive queue of a packet socket.
590 pktsock_dequeue(struct pktsock
* pkt
)
592 struct pbuf
*pbuf
, **pnext
;
595 pbuf
= pkt
->pkt_rcvhead
;
596 assert(pbuf
!= NULL
);
598 pnext
= pchain_end(pbuf
);
599 size
= pchain_size(pbuf
);
601 if ((pkt
->pkt_rcvhead
= *pnext
) == NULL
)
602 pkt
->pkt_rcvtailp
= &pkt
->pkt_rcvhead
;
604 assert(pkt
->pkt_rcvlen
>= size
);
605 pkt
->pkt_rcvlen
-= size
;
612 * Perform preliminary checks on a receive request.
615 pktsock_pre_recv(struct sock
* sock __unused
, endpoint_t user_endpt __unused
,
620 * We accept the same flags across all socket types in LWIP, and then
621 * simply ignore the ones we do not support for packet sockets.
623 if ((flags
& ~(MSG_PEEK
| MSG_WAITALL
)) != 0)
630 * Add a chunk of control data to the global control buffer, starting from
631 * offset 'off'. The chunk has the given level and type, and its data is given
632 * in the buffer 'ptr' with size 'len'. Return the (padded) size of the chunk
633 * that was generated as a result.
636 pktsock_add_ctl(int level
, int type
, void * ptr
, socklen_t len
, size_t off
)
641 size
= CMSG_SPACE(len
);
644 * The global control buffer must be large enough to store one chunk
645 * of each of the supported options. If this panic triggers, increase
646 * PKTSOCK_CTLBUF_SIZE by as much as needed.
648 if (off
+ size
> sizeof(pktsock_ctlbuf
))
649 panic("control buffer too small, increase "
650 "PKTSOCK_CTLBUF_SIZE");
652 memset(&cmsg
, 0, sizeof(cmsg
));
653 cmsg
.cmsg_len
= CMSG_LEN(len
);
654 cmsg
.cmsg_level
= level
;
655 cmsg
.cmsg_type
= type
;
658 * Clear any padding space. This can be optimized, but in any case we
659 * must be careful not to copy out any bytes that have not been
660 * initialized at all.
662 memset(&pktsock_ctlbuf
[off
], 0, size
);
664 memcpy(&pktsock_ctlbuf
[off
], &cmsg
, sizeof(cmsg
));
665 memcpy(CMSG_DATA((struct cmsghdr
*)&pktsock_ctlbuf
[off
]), ptr
, len
);
671 * Generate and copy out control data, as part of delivering a packet from
672 * socket 'pkt' to userland. The control data buffer is given as 'ctl', with
673 * a user-given length of 'ctl_len' bytes. The packet's header information is
674 * provided as 'pkthdr', and its source and destination addresses as 'pktaddr',
675 * which maybe a pktaddr4 or pktaddr6 structure depending on the value of the
676 * PKTHF_IPV6 flag in the 'flags' field in 'pkthdr'. Note that we support
677 * dual-stack sockets, and as such it is possible that the socket is of domain
678 * AF_INET6 while the received packet is an IPv4 packet. On success, return
679 * the size of the control data copied out (possibly zero). If more control
680 * data were generated than copied out, also merge the MSG_CTRUNC flag into
681 * 'rflags'. On failure, return a negative error code.
684 pktsock_put_ctl(struct pktsock
* pkt
, const struct sockdriver_data
* ctl
,
685 socklen_t ctl_len
, struct pkthdr
* pkthdr
, void * pktaddr
,
688 struct pktaddr6
*pktaddr6
;
689 struct pktaddr4
*pktaddr4
;
690 struct in_pktinfo ipi
;
691 struct in6_pktinfo ipi6
;
698 flags
= ipsock_get_flags(&pkt
->pkt_ipsock
);
700 if (!(flags
& (PKTF_RECVINFO
| PKTF_RECVTOS
| PKTF_RECVTTL
)))
704 * Important: all generated control chunks must fit in the global
705 * control buffer together. When adding more options here, ensure that
706 * the control buffer remains large enough to receive all options at
707 * once. See also the panic in pktsock_add_ctl().
712 * IPv6 sockets may receive IPv4 packets. The ancillary data is in the
713 * format corresponding to the socket, which means we may have to
714 * convert any IPv4 addresses from the packet to IPv4-mapped IPv6
715 * addresses for the ancillary data, just like the source address.
717 if (ipsock_is_ipv6(&pkt
->pkt_ipsock
)) {
718 if (flags
& PKTF_RECVTTL
) {
721 off
+= pktsock_add_ctl(IPPROTO_IPV6
, IPV6_HOPLIMIT
,
722 &val
, sizeof(val
), off
);
725 if (flags
& PKTF_RECVTOS
) {
728 off
+= pktsock_add_ctl(IPPROTO_IPV6
, IPV6_TCLASS
, &val
,
732 if (flags
& PKTF_RECVINFO
) {
733 memset(&ipi6
, 0, sizeof(ipi6
));
735 if (pkthdr
->flags
& PKTHF_IPV6
) {
736 pktaddr6
= (struct pktaddr6
*)pktaddr
;
737 memcpy(&ipi6
.ipi6_addr
, &pktaddr6
->dstaddr
,
738 sizeof(ipi6
.ipi6_addr
));
740 pktaddr4
= (struct pktaddr4
*)pktaddr
;
742 addr_make_v4mapped_v6(&ipaddr
,
745 memcpy(&ipi6
.ipi6_addr
,
746 ip_2_ip6(&ipaddr
)->addr
,
747 sizeof(ipi6
.ipi6_addr
));
749 ipi6
.ipi6_ifindex
= pkthdr
->dstif
;
751 off
+= pktsock_add_ctl(IPPROTO_IPV6
, IPV6_PKTINFO
,
752 &ipi6
, sizeof(ipi6
), off
);
755 if (flags
& PKTF_RECVTTL
) {
758 off
+= pktsock_add_ctl(IPPROTO_IP
, IP_TTL
, &byte
,
762 if (flags
& PKTF_RECVINFO
) {
763 assert(!(pkthdr
->flags
& PKTHF_IPV6
));
764 pktaddr4
= (struct pktaddr4
*)pktaddr
;
766 memset(&ipi
, 0, sizeof(ipi
));
767 memcpy(&ipi
.ipi_addr
, &pktaddr4
->dstaddr
,
768 sizeof(ipi
.ipi_addr
));
769 ipi
.ipi_ifindex
= pkthdr
->dstif
;
771 off
+= pktsock_add_ctl(IPPROTO_IP
, IP_PKTINFO
, &ipi
,
781 *rflags
|= MSG_CTRUNC
;
784 (r
= sockdriver_copyout(ctl
, 0, pktsock_ctlbuf
, ctl_len
)) != OK
)
791 * Receive data on a packet socket.
794 pktsock_recv(struct sock
* sock
, const struct sockdriver_data
* data
,
795 size_t len
, size_t * off
, const struct sockdriver_data
* ctl
,
796 socklen_t ctl_len
, socklen_t
* ctl_off
, struct sockaddr
* addr
,
797 socklen_t
* addr_len
, endpoint_t user_endpt __unused
, int flags
,
798 size_t min __unused
, int * rflags
)
800 struct pktsock
*pkt
= (struct pktsock
*)sock
;
801 struct pktaddr4 pktaddr4
;
802 struct pktaddr6 pktaddr6
;
803 struct pkthdr pkthdr
;
809 if ((pbuf
= pkt
->pkt_rcvhead
) == NULL
)
813 * Get the ancillary data for the packet. The format of the ancillary
814 * data depends on the received packet type, which may be different
815 * from the socket type.
817 util_pbuf_header(pbuf
, sizeof(pkthdr
));
819 memcpy(&pkthdr
, pbuf
->payload
, sizeof(pkthdr
));
821 if (pkthdr
.flags
& PKTHF_IPV6
) {
822 util_pbuf_header(pbuf
, sizeof(pktaddr6
));
824 memcpy(&pktaddr6
, pbuf
->payload
, sizeof(pktaddr6
));
827 ip_addr_copy_from_ip6_packed(srcaddr
, pktaddr6
.srcaddr
);
828 if (ip6_addr_has_scope(ip_2_ip6(&srcaddr
), IP6_UNICAST
))
829 ip6_addr_set_zone(ip_2_ip6(&srcaddr
), pkthdr
.addrif
);
831 util_pbuf_header(pbuf
,
832 -(int)(sizeof(pkthdr
) + sizeof(pktaddr6
)));
834 util_pbuf_header(pbuf
, sizeof(pktaddr4
));
836 memcpy(&pktaddr4
, pbuf
->payload
, sizeof(pktaddr4
));
839 ip_addr_copy_from_ip4(srcaddr
, pktaddr4
.srcaddr
);
841 util_pbuf_header(pbuf
,
842 -(int)(sizeof(pkthdr
) + sizeof(pktaddr4
)));
845 /* Copy out the packet data to the calling user process. */
846 if (len
>= pbuf
->tot_len
)
849 *rflags
|= MSG_TRUNC
;
851 r
= util_copy_data(data
, len
, 0, pbuf
, 0, FALSE
/*copy_in*/);
856 /* Generate and copy out ancillary (control) data, if requested. */
857 if ((r
= pktsock_put_ctl(pkt
, ctl
, ctl_len
, &pkthdr
, pktaddr
,
861 /* Store the source IP address. */
862 ipsock_put_addr(&pkt
->pkt_ipsock
, addr
, addr_len
, &srcaddr
,
865 /* Set multicast or broadcast message flag, if applicable. */
866 if (pkthdr
.flags
& PKTHF_MCAST
)
867 *rflags
|= MSG_MCAST
;
868 else if (pkthdr
.flags
& PKTHF_BCAST
)
869 *rflags
|= MSG_BCAST
;
871 /* Discard the packet now, unless we were instructed to peek only. */
872 if (!(flags
& MSG_PEEK
))
873 pktsock_dequeue(pkt
);
875 /* Return the received part of the packet length. */
882 * Test whether data can be received on a packet socket, and if so, how many
886 pktsock_test_recv(struct sock
* sock
, size_t min __unused
, size_t * size
)
888 struct pktsock
*pkt
= (struct pktsock
*)sock
;
890 if (pkt
->pkt_rcvhead
== NULL
)
894 *size
= pkt
->pkt_rcvhead
->tot_len
;
899 * The caller has performed a multicast operation on the given socket. Thus,
900 * the caller is multicast aware. Remember this, because that means the socket
901 * may also receive traffic to multicast destinations.
904 pktsock_set_mcaware(struct pktsock
* pkt
)
907 ipsock_set_flag(&pkt
->pkt_ipsock
, PKTF_MCAWARE
);
911 * Set socket options on a packet socket.
914 pktsock_setsockopt(struct pktsock
* pkt
, int level
, int name
,
915 const struct sockdriver_data
* data
, socklen_t len
,
916 struct ipopts
* ipopts
)
919 struct ipv6_mreq ipv6mr
;
920 struct in6_pktinfo ipi6
;
921 ip_addr_t ipaddr
, ifaddr
;
925 int r
, val
, has_scope
;
929 if (ipsock_is_ipv6(&pkt
->pkt_ipsock
))
933 case IP_ADD_MEMBERSHIP
:
934 case IP_DROP_MEMBERSHIP
:
935 pktsock_set_mcaware(pkt
);
937 if ((r
= sockdriver_copyin_opt(data
, &imr
, sizeof(imr
),
941 ip_addr_set_ip4_u32(&ipaddr
, imr
.imr_multiaddr
.s_addr
);
942 ip_addr_set_ip4_u32(&ifaddr
, imr
.imr_interface
.s_addr
);
944 if (!ip_addr_isany(&ifaddr
)) {
945 ifdev
= ifaddr_map_by_addr(&ifaddr
);
948 return EADDRNOTAVAIL
;
952 if (name
== IP_ADD_MEMBERSHIP
)
953 r
= mcast_join(&pkt
->pkt_mcast
, &ipaddr
,
956 r
= mcast_leave(&pkt
->pkt_mcast
, &ipaddr
,
963 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
968 case IP_RECVTTL
: flag
= PKTF_RECVTTL
; break;
969 case IP_RECVPKTINFO
: flag
= PKTF_RECVINFO
; break;
970 default: flag
= 0; assert(0); break;
974 ipsock_set_flag(&pkt
->pkt_ipsock
, flag
);
976 ipsock_clear_flag(&pkt
->pkt_ipsock
, flag
);
984 if (!ipsock_is_ipv6(&pkt
->pkt_ipsock
))
988 case IPV6_JOIN_GROUP
:
989 case IPV6_LEAVE_GROUP
:
990 pktsock_set_mcaware(pkt
);
992 if ((r
= sockdriver_copyin_opt(data
, &ipv6mr
,
993 sizeof(ipv6mr
), len
)) != OK
)
996 ip_addr_set_zero_ip6(&ipaddr
);
997 memcpy(ip_2_ip6(&ipaddr
)->addr
,
998 &ipv6mr
.ipv6mr_multiaddr
,
999 sizeof(ip_2_ip6(&ipaddr
)->addr
));
1002 * We currently do not support joining IPv4 multicast
1003 * groups on IPv6 sockets. The reason for this is that
1004 * this would require decisions on what to do if the
1005 * socket is set to V6ONLY later, as well as various
1006 * additional exceptions for a case that hopefully
1007 * doesn't occur in practice anyway.
1009 if (ip6_addr_isipv4mappedipv6(ip_2_ip6(&ipaddr
)))
1010 return EADDRNOTAVAIL
;
1012 has_scope
= ip6_addr_has_scope(ip_2_ip6(&ipaddr
),
1015 if ((ifindex
= ipv6mr
.ipv6mr_interface
) != 0) {
1016 ifdev
= ifdev_get_by_index(ifindex
);
1022 ip6_addr_set_zone(ip_2_ip6(&ipaddr
),
1026 return EADDRNOTAVAIL
;
1031 if (name
== IPV6_JOIN_GROUP
)
1032 r
= mcast_join(&pkt
->pkt_mcast
, &ipaddr
,
1035 r
= mcast_leave(&pkt
->pkt_mcast
, &ipaddr
,
1040 case IPV6_USE_MIN_MTU
:
1041 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
1045 if (val
< -1 || val
> 1)
1049 * lwIP does not support path MTU discovery, so do
1050 * nothing. TODO: see if this is actually good enough.
1055 if ((r
= sockdriver_copyin_opt(data
, &ipi6
,
1056 sizeof(ipi6
), len
)) != OK
)
1060 * Simply copy in what is given. The values will be
1061 * parsed only once a packet is sent, in
1062 * pktsock_get_pktinfo(). Otherwise, if we perform
1063 * checks here, they may be outdated by the time the
1064 * values are actually used.
1066 memcpy(&pkt
->pkt_srcaddr
.addr
, &ipi6
.ipi6_addr
,
1067 sizeof(pkt
->pkt_srcaddr
.addr
));
1068 pkt
->pkt_ifindex
= ipi6
.ipi6_ifindex
;
1072 case IPV6_RECVPKTINFO
:
1073 case IPV6_RECVHOPLIMIT
:
1074 case IPV6_RECVTCLASS
:
1075 if ((r
= sockdriver_copyin_opt(data
, &val
, sizeof(val
),
1080 case IPV6_RECVPKTINFO
: flag
= PKTF_RECVINFO
; break;
1081 case IPV6_RECVHOPLIMIT
: flag
= PKTF_RECVTTL
; break;
1082 case IPV6_RECVTCLASS
: flag
= PKTF_RECVTOS
; break;
1083 default: flag
= 0; assert(0); break;
1087 ipsock_set_flag(&pkt
->pkt_ipsock
, flag
);
1089 ipsock_clear_flag(&pkt
->pkt_ipsock
, flag
);
1097 return ipsock_setsockopt(&pkt
->pkt_ipsock
, level
, name
, data
, len
,
1102 * Retrieve socket options on a packet socket.
1105 pktsock_getsockopt(struct pktsock
* pkt
, int level
, int name
,
1106 const struct sockdriver_data
* data
, socklen_t
* len
,
1107 struct ipopts
* ipopts
)
1109 struct in6_pktinfo ipi6
;
1115 if (ipsock_is_ipv6(&pkt
->pkt_ipsock
))
1120 case IP_RECVPKTINFO
:
1122 case IP_RECVTTL
: flag
= PKTF_RECVTTL
; break;
1123 case IP_RECVPKTINFO
: flag
= PKTF_RECVINFO
; break;
1124 default: flag
= 0; assert(0); break;
1127 val
= !!(ipsock_get_flag(&pkt
->pkt_ipsock
, flag
));
1129 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1136 if (!ipsock_is_ipv6(&pkt
->pkt_ipsock
))
1140 case IPV6_USE_MIN_MTU
:
1142 * TODO: sort out exactly what lwIP actually supports
1143 * in the way of path MTU discovery. Value 1 means
1144 * that path MTU discovery is disabled and packets are
1145 * sent at the minimum MTU (RFC 3542).
1149 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1153 memset(&ipi6
, 0, sizeof(ipi6
));
1156 * Simply copy out whatever was given before. These
1157 * fields are initialized to zero on socket creation.
1159 memcpy(&ipi6
.ipi6_addr
, &pkt
->pkt_srcaddr
.addr
,
1160 sizeof(ipi6
.ipi6_addr
));
1161 ipi6
.ipi6_ifindex
= pkt
->pkt_ifindex
;
1163 return sockdriver_copyout_opt(data
, &ipi6
,
1166 case IPV6_RECVPKTINFO
:
1167 case IPV6_RECVHOPLIMIT
:
1168 case IPV6_RECVTCLASS
:
1170 case IPV6_RECVPKTINFO
: flag
= PKTF_RECVINFO
; break;
1171 case IPV6_RECVHOPLIMIT
: flag
= PKTF_RECVTTL
; break;
1172 case IPV6_RECVTCLASS
: flag
= PKTF_RECVTOS
; break;
1173 default: flag
= 0; assert(0); break;
1176 val
= !!(ipsock_get_flag(&pkt
->pkt_ipsock
, flag
));
1178 return sockdriver_copyout_opt(data
, &val
, sizeof(val
),
1185 return ipsock_getsockopt(&pkt
->pkt_ipsock
, level
, name
, data
, len
,
1190 * Drain the receive queue of a packet socket.
1193 pktsock_drain(struct pktsock
* pkt
)
1196 while (pkt
->pkt_rcvhead
!= NULL
)
1197 pktsock_dequeue(pkt
);
1199 assert(pkt
->pkt_rcvlen
== 0);
1200 assert(pkt
->pkt_rcvtailp
== &pkt
->pkt_rcvhead
);
1204 * Shut down a packet socket for reading and/or writing.
1207 pktsock_shutdown(struct pktsock
* pkt
, unsigned int mask
)
1210 if (mask
& SFL_SHUT_RD
)
1215 * Close a packet socket.
1218 pktsock_close(struct pktsock
* pkt
)
1223 mcast_leave_all(&pkt
->pkt_mcast
);
1227 * Return the rounded-up number of bytes in the packet socket's receive queue,
1228 * for sysctl(7). NetBSD returns the used portion of each buffer, but that
1229 * would be quite some extra effort for us (TODO).
1232 pktsock_get_recvlen(struct pktsock
* pkt
)
1235 return pkt
->pkt_rcvlen
;