Sync with cat.c from netbsd-8
[minix3.git] / minix / net / lwip / ethif.c
blob863b12e4893b3d13c2332cf99f32e84ac9df5698
1 /* LWIP service - ethif.c - ethernet interfaces */
2 /*
3 * The most important aspect of this module is to maintain a send queue for the
4 * interface. This send queue consists of packets to send. At times, the user
5 * may request a change to the driver configuration. While configuration
6 * requests would ideally be enqueued in the send queue, this has proven too
7 * problematic to work in practice, especially since out-of-memory conditions
8 * may prevent configuration requests from being accepted immediately in such a
9 * model. Instead, we take a simple and blunt approach: configuration requests
10 * "cut in line" and thus take precedence over pending packets in the send
11 * queue. This may not always be entirely correct: for example, packets may be
12 * transmitted with the old ethernet address after the network device has
13 * already been reconfigured to receive from a new ethernet address. However,
14 * this should not be a real problem, and we take care explicitly of perhaps
15 * the most problematic case: packets not getting checksummed due to checksum
16 * offloading configuration changes.
18 * Even with this blunt approach, we maintain three concurrent configurations:
19 * the active, the pending, and the wanted configuration. The active one is
20 * the last known active configuration at the network driver. It used not only
21 * to report whether the device is in RUNNING state, but also to replay the
22 * active configuration to a restarted driver. The pending configuration is
23 * a partially new configuration that has been given to ndev to send to the
24 * driver, but not yet acknowledged by the driver. Finally, the wanted
25 * configuration is the latest one that has yet to be given to ndev.
27 * Each configuration has a bitmask indicating which part of the configuration
28 * has changed, in order to limit work on the driver side. This is also the
29 * reason that the pending and wanted configurations are separate: if e.g. a
30 * media change is pending at the driver, and the user also requests a mode
31 * change, we do not want the media change to be repeated after it has been
32 * acknowleged by the driver, just to change the mode as well. In this example
33 * the pending configuration will have NDEV_SET_MEDIA set, and the wanted
34 * configuration will have NDEV_SET_MODE set. Once acknowledged, the pending
35 * bitmask is cleared and the wanted bitmask is tested to see if another
36 * configuration change should be given to ndev. Technically, this could lead
37 * to starvation of actual packet transmission, but we expect configuration
38 * changes to be very rare, since they are always user initiated.
40 * It is important to note for understanding the code that for some fields
41 * (mode, flags, caps), the three configurations are cascading: even though the
42 * wanted configuration may not have NDEV_SET_MODE set, its mode field will
43 * still contain the most recently requested mode; that is, the mode in the
44 * pending configuration if that one has NDEV_SET_MODE set, or otherwise the
45 * mode in the active configuration. For that reason, we carefully merge
46 * configuration requests into the next level (wanted -> pending -> active),
47 * updating just the fields that have been changed by the previous level. This
48 * approach simplifies obtaining current values a lot, but is not very obvious.
50 * Also, we never send multiple configuration requests at once, even though
51 * ndev would let us do that: we use a single array for the list of multicast
52 * ethernet addresses that we send to the driver, which the driver may retrieve
53 * (using a memory grant) at any time. We necessarily recompute the multicast
54 * list before sending a configuration request, and thus, sending multiple
55 * requests at once may lead to the driver retrieving a corrupted list.
58 #include "lwip.h"
59 #include "ethif.h"
61 #include "lwip/etharp.h"
62 #include "lwip/ethip6.h"
63 #include "lwip/igmp.h"
64 #include "lwip/mld6.h"
66 #include <net/if_media.h>
68 #define ETHIF_MAX_MTU 1500 /* maximum MTU value for ethernet */
69 #define ETHIF_DEF_MTU ETHIF_MAX_MTU /* default MTU value that we use */
71 #define ETHIF_MCAST_MAX 8 /* maximum number of multicast addresses */
73 struct ethif {
74 struct ifdev ethif_ifdev; /* interface device, MUST be first */
75 ndev_id_t ethif_ndev; /* network device ID */
76 unsigned int ethif_flags; /* interface flags (ETHIFF_) */
77 uint32_t ethif_caps; /* driver capabilities (NDEV_CAPS_) */
78 uint32_t ethif_media; /* driver-reported media type (IFM_) */
79 struct ndev_conf ethif_active; /* active configuration (at driver) */
80 struct ndev_conf ethif_pending; /* pending configuration (at ndev) */
81 struct ndev_conf ethif_wanted; /* desired configuration (waiting) */
82 struct ndev_hwaddr ethif_mclist[ETHIF_MCAST_MAX]; /* multicast list */
83 struct { /* send queue (packet/conf refs) */
84 struct pbuf *es_head; /* first (oldest) request reference */
85 struct pbuf **es_unsentp; /* ptr-ptr to first unsent request */
86 struct pbuf **es_tailp; /* ptr-ptr for adding new requests */
87 unsigned int es_count; /* buffer count, see ETHIF_PBUF_.. */
88 } ethif_snd;
89 struct { /* receive queue (packets) */
90 struct pbuf *er_head; /* first (oldest) request buffer */
91 struct pbuf **er_tailp; /* ptr-ptr for adding new requests */
92 } ethif_rcv;
93 SIMPLEQ_ENTRY(ethif) ethif_next; /* next in free list */
94 } ethif_array[NR_NDEV]; /* any other value would be suboptimal */
96 #define ethif_get_name(ethif) (ifdev_get_name(&(ethif)->ethif_ifdev))
97 #define ethif_get_netif(ethif) (ifdev_get_netif(&(ethif)->ethif_ifdev))
99 #define ETHIFF_DISABLED 0x01 /* driver has disappeared */
100 #define ETHIFF_FIRST_CONF 0x02 /* first configuration request sent */
103 * Send queue limit settings. Both are counted in number of pbuf objects.
104 * ETHIF_PBUF_MIN is the minimum number of pbuf objects that can always be
105 * enqueued on a particular interface's send queue. It should be at least the
106 * number of pbufs for one single packet after being reduced to the ndev limit,
107 * so NDEV_IOV_MAX (8) is a natural fit. The ETHIF_PBUF_MAX_n values define
108 * the maximum number of pbufs that may be used by all interface send queues
109 * combined, whichever of the two is smaller. The resulting number must be set
110 * fairly high, because at any time there may be a lot of active TCP sockets
111 * that all generate a (multi-pbuf) packet as a result of a clock tick. It is
112 * currently a function of the size of the buffer pool, capped to a value that
113 * is a function of the number of TCP sockets (assuming one packet per socket;
114 * up to MSS/BUFSIZE+1 data pbufs, one header pbuf, one extra as margin). The
115 * difference between the per-interface guaranteed minimum and the global
116 * maximum is what makes up a pool of "spares", which are really just tokens
117 * allowing for enqueuing of that many pbufs.
119 #define ETHIF_PBUF_MIN (NDEV_IOV_MAX)
120 #define ETHIF_PBUF_MAX_1 (mempool_cur_buffers() >> 1)
121 #define ETHIF_PBUF_MAX_2 (NR_TCPSOCK * (TCP_MSS / MEMPOOL_BUFSIZE + 3))
123 static unsigned int ethif_spares;
125 static SIMPLEQ_HEAD(, ethif) ethif_freelist; /* free ethif objects */
127 static const struct ifdev_ops ethif_ops;
129 #ifdef INET6
130 static ip6_addr_t ethif_ip6addr_allnodes_ll;
131 #endif /* INET6 */
134 * Initialize the ethernet interfaces module.
136 void
137 ethif_init(void)
139 unsigned int slot;
141 /* Initialize the list of free ethif objects. */
142 SIMPLEQ_INIT(&ethif_freelist);
144 for (slot = 0; slot < __arraycount(ethif_array); slot++)
145 SIMPLEQ_INSERT_TAIL(&ethif_freelist, &ethif_array[slot],
146 ethif_next);
148 /* Initialize the number of in-use spare tokens. */
149 ethif_spares = 0;
151 #ifdef INET6
152 /* Preinitialize the link-local all-nodes IPv6 multicast address. */
153 ip6_addr_set_allnodes_linklocal(&ethif_ip6addr_allnodes_ll);
154 #endif /* INET6 */
158 * As the result of some event, the NetBSD-style interface flags for this
159 * interface may have changed. Recompute and update the flags as appropriate.
161 static void
162 ethif_update_ifflags(struct ethif * ethif)
164 unsigned int ifflags;
166 ifflags = ifdev_get_ifflags(&ethif->ethif_ifdev);
168 /* These are the flags that we might update here. */
169 ifflags &= ~(IFF_RUNNING | IFF_ALLMULTI);
172 * For us, the RUNNING flag indicates that -as far as we know- the
173 * network device is fully operational and has its I/O engines running.
174 * This is a reflection of the current state, not of any intention, so
175 * we look at the active configuration here. We use the same approach
176 * for one other receive state flags here (ALLMULTI).
178 if ((ethif->ethif_flags &
179 (ETHIFF_DISABLED | ETHIFF_FIRST_CONF)) == 0 &&
180 ethif->ethif_active.nconf_mode != NDEV_MODE_DOWN) {
181 ifflags |= IFF_RUNNING;
183 if (ethif->ethif_active.nconf_mode & NDEV_MODE_MCAST_ALL)
184 ifflags |= IFF_ALLMULTI;
187 ifdev_update_ifflags(&ethif->ethif_ifdev, ifflags);
191 * Add a multicast hardware receive address into the set of hardware addresses
192 * in the given configuration, if the given address is not already in the
193 * configuration's set. Adjust the configuration's mode as needed. Return
194 * TRUE If the address was added, and FALSE if the address could not be added
195 * due to a full list (of 'max' elements), in which case the mode is changed
196 * from receiving from listed multicast addresses to receiving from all
197 * multicast addresses.
199 static int
200 ethif_add_mcast(struct ndev_conf * nconf, unsigned int max,
201 struct ndev_hwaddr * hwaddr)
203 unsigned int slot;
206 * See if the hardware address is already in the list we produced so
207 * far. This makes the multicast list generation O(n^2) but we do not
208 * expect many entries nor is the list size large anyway.
210 for (slot = 0; slot < nconf->nconf_mccount; slot++)
211 if (!memcmp(&nconf->nconf_mclist[slot], hwaddr,
212 sizeof(*hwaddr)))
213 return TRUE;
215 if (nconf->nconf_mccount < max) {
216 memcpy(&nconf->nconf_mclist[slot], hwaddr, sizeof(*hwaddr));
217 nconf->nconf_mccount++;
219 nconf->nconf_mode |= NDEV_MODE_MCAST_LIST;
221 return TRUE;
222 } else {
223 nconf->nconf_mode &= ~NDEV_MODE_MCAST_LIST;
224 nconf->nconf_mode |= NDEV_MODE_MCAST_ALL;
226 return FALSE;
231 * Add the ethernet hardware address derived from the given IPv4 multicast
232 * address, to the list of multicast addresses.
234 static int
235 ethif_add_mcast_v4(struct ndev_conf * nconf, unsigned int max,
236 const ip4_addr_t * ip4addr)
238 struct ndev_hwaddr hwaddr;
240 /* 01:00:05:xx:xx:xx with the lower 23 bits of the IPv4 address. */
241 hwaddr.nhwa_addr[0] = LL_IP4_MULTICAST_ADDR_0;
242 hwaddr.nhwa_addr[1] = LL_IP4_MULTICAST_ADDR_1;
243 hwaddr.nhwa_addr[2] = LL_IP4_MULTICAST_ADDR_2;
244 hwaddr.nhwa_addr[3] = (ip4_addr_get_u32(ip4addr) >> 16) & 0x7f;
245 hwaddr.nhwa_addr[4] = (ip4_addr_get_u32(ip4addr) >> 8) & 0xff;
246 hwaddr.nhwa_addr[5] = (ip4_addr_get_u32(ip4addr) >> 0) & 0xff;
248 return ethif_add_mcast(nconf, max, &hwaddr);
252 * Add the ethernet hardware address derived from the given IPv6 multicast
253 * address, to the list of multicast addresses.
255 static int
256 ethif_add_mcast_v6(struct ndev_conf * nconf, unsigned int max,
257 const ip6_addr_t * ip6addr)
259 struct ndev_hwaddr hwaddr;
261 /* 33:33:xx:xx:xx:xx with the lower 32 bits of the IPv6 address. */
262 hwaddr.nhwa_addr[0] = LL_IP6_MULTICAST_ADDR_0;
263 hwaddr.nhwa_addr[1] = LL_IP6_MULTICAST_ADDR_1;
264 memcpy(&hwaddr.nhwa_addr[2], &ip6addr->addr[3], sizeof(uint32_t));
266 return ethif_add_mcast(nconf, max, &hwaddr);
270 * Set up the multicast mode for a configuration that is to be sent to a
271 * network driver, generating a multicast receive address list for the driver
272 * as applicable.
274 static void
275 ethif_gen_mcast(struct ethif * ethif, struct ndev_conf * nconf)
277 struct igmp_group *group4;
278 struct mld_group *group6;
279 unsigned int max;
281 /* Make sure that multicast is supported at all for this interface. */
282 if (!(ethif->ethif_caps & NDEV_CAP_MCAST))
283 return;
285 /* Make sure the mode is being (re)configured to be up. */
286 if (!(nconf->nconf_set & NDEV_SET_MODE) ||
287 nconf->nconf_mode == NDEV_MODE_DOWN)
288 return;
290 /* Recompute the desired multicast flags. */
291 nconf->nconf_mode &= ~(NDEV_MODE_MCAST_LIST | NDEV_MODE_MCAST_ALL);
293 /* If promiscuous mode is enabled, receive all multicast packets. */
294 if (nconf->nconf_mode & NDEV_MODE_PROMISC) {
295 nconf->nconf_mode |= NDEV_MODE_MCAST_ALL;
297 return;
301 * Map all IGMP/MLD6 multicast addresses to ethernet addresses, merging
302 * any duplicates to save slots. We have to add the MLD6 all-nodes
303 * multicast address ourselves, which also means the list is never
304 * empty unless compiling with USE_INET6=no. If the list is too small
305 * for all addresses, opt to receive all multicast packets instead.
307 nconf->nconf_mclist = ethif->ethif_mclist;
308 nconf->nconf_mccount = 0;
309 max = __arraycount(ethif->ethif_mclist);
311 for (group4 = netif_igmp_data(ethif_get_netif(ethif)); group4 != NULL;
312 group4 = group4->next)
313 if (!ethif_add_mcast_v4(nconf, max, &group4->group_address))
314 return;
316 #ifdef INET6
317 if (!ethif_add_mcast_v6(nconf, max, &ethif_ip6addr_allnodes_ll))
318 return;
319 #endif /* INET6 */
321 for (group6 = netif_mld6_data(ethif_get_netif(ethif)); group6 != NULL;
322 group6 = group6->next)
323 if (!ethif_add_mcast_v6(nconf, max, &group6->group_address))
324 return;
328 * Merge a source configuration into a destination configuration, copying any
329 * fields intended to be set from the source into the destination and clearing
330 * the "set" mask in the source, without changing the source fields, so that
331 * the source will reflect the destination's contents.
333 static void
334 ethif_merge_conf(struct ndev_conf * dconf, struct ndev_conf * sconf)
337 dconf->nconf_set |= sconf->nconf_set;
339 if (sconf->nconf_set & NDEV_SET_MODE)
340 dconf->nconf_mode = sconf->nconf_mode;
341 if (sconf->nconf_set & NDEV_SET_CAPS)
342 dconf->nconf_caps = sconf->nconf_caps;
343 if (sconf->nconf_set & NDEV_SET_FLAGS)
344 dconf->nconf_flags = sconf->nconf_flags;
345 if (sconf->nconf_set & NDEV_SET_MEDIA)
346 dconf->nconf_media = sconf->nconf_media;
347 if (sconf->nconf_set & NDEV_SET_HWADDR)
348 memcpy(&dconf->nconf_hwaddr, &sconf->nconf_hwaddr,
349 sizeof(dconf->nconf_hwaddr));
351 sconf->nconf_set = 0;
355 * Return TRUE if we can and should try to pass a configuration request to the
356 * ndev layer on this interface, or FALSE otherwise.
358 static int
359 ethif_can_conf(struct ethif * ethif)
362 /* Is there a configuration change waiting? The common case is no. */
363 if (ethif->ethif_wanted.nconf_set == 0)
364 return FALSE;
367 * Is there a configuration change pending already? Then wait for it
368 * to be acknowledged first.
370 if (ethif->ethif_pending.nconf_set != 0)
371 return FALSE;
373 /* Make sure the interface is in the appropriate state. */
374 if (ethif->ethif_flags & ETHIFF_DISABLED)
375 return FALSE;
377 /* First let all current packet send requests finish. */
378 return (ethif->ethif_snd.es_unsentp == &ethif->ethif_snd.es_head);
382 * Return TRUE if we can and should try to pass the next unsent packet send
383 * request to the ndev layer on this interface, or FALSE otherwise.
385 static int
386 ethif_can_send(struct ethif * ethif)
389 /* Is there anything to hand to ndev at all? The common case is no. */
390 if (*ethif->ethif_snd.es_unsentp == NULL)
391 return FALSE;
394 * Is there a configuration change pending? Then we cannot send
395 * packets yet. Always let all configuration changes through first.
397 if (ethif->ethif_pending.nconf_set != 0 ||
398 ethif->ethif_wanted.nconf_set != 0)
399 return FALSE;
401 /* Make sure the interface is in the appropriate state. */
402 if ((ethif->ethif_flags & (ETHIFF_DISABLED | ETHIFF_FIRST_CONF)) != 0)
403 return FALSE;
405 return TRUE;
409 * Return TRUE if we can and should try to receive packets on this interface
410 * and are ready to accept received packets, or FALSE otherwise.
412 static int
413 ethif_can_recv(struct ethif * ethif)
416 if ((ethif->ethif_flags & (ETHIFF_DISABLED | ETHIFF_FIRST_CONF)) != 0)
417 return FALSE;
420 * We do not check the link status here. There is no reason not to
421 * spawn receive requests, or accept received packets, while the link
422 * is reported to be down.
424 return ifdev_is_up(&ethif->ethif_ifdev);
428 * Polling function, invoked after each message loop iteration. Check whether
429 * any configuration change or packets can be sent to the driver, and whether
430 * any new packet receive requests can be enqueued at the driver.
432 static void
433 ethif_poll(struct ifdev * ifdev)
435 struct ethif *ethif = (struct ethif *)ifdev;
436 struct pbuf *pbuf, *pref;
439 * If a configuration request is desired, see if we can send it to the
440 * driver now. Otherwise, attempt to send any packets if possible.
441 * In both cases, a failure of the ndev call indicates that we should
442 * try again later.
444 if (ethif_can_conf(ethif)) {
445 ethif_gen_mcast(ethif, &ethif->ethif_wanted);
448 * On success, move the wanted configuration into the pending
449 * slot. Otherwise, try again on the next poll iteration.
451 if (ndev_conf(ethif->ethif_ndev, &ethif->ethif_wanted) == OK)
452 ethif_merge_conf(&ethif->ethif_pending,
453 &ethif->ethif_wanted);
454 } else {
455 while (ethif_can_send(ethif)) {
456 pref = *ethif->ethif_snd.es_unsentp;
458 if (pref->type == PBUF_REF)
459 pbuf = (struct pbuf *)pref->payload;
460 else
461 pbuf = pref;
463 if (ndev_send(ethif->ethif_ndev, pbuf) == OK)
464 ethif->ethif_snd.es_unsentp =
465 pchain_end(pref);
466 else
467 break;
472 * Attempt to create additional receive requests for the driver, if
473 * applicable. We currently do not set a limit on the maximum number
474 * of concurrently pending receive requests here, because the maximum
475 * in ndev is already quite low. That may have to be changed one day.
477 while (ethif_can_recv(ethif) && ndev_can_recv(ethif->ethif_ndev)) {
479 * Allocate a buffer for the network device driver to copy the
480 * received packet into. Allocation may fail if no buffers are
481 * available at this time; in that case simply try again later.
482 * We add room for a VLAN tag even though we do not support
483 * such tags just yet.
485 if ((pbuf = pchain_alloc(PBUF_RAW, ETH_PAD_LEN + ETH_HDR_LEN +
486 ETHIF_MAX_MTU + NDEV_ETH_PACKET_TAG)) == NULL)
487 break;
490 * Effectively throw away two bytes in order to align TCP/IP
491 * header fields to 32 bits. See the short discussion in
492 * lwipopts.h as to why we are not using lwIP's ETH_PAD_SIZE.
494 util_pbuf_header(pbuf, -ETH_PAD_LEN);
497 * Send the request to the driver. This may still fail due to
498 * grant allocation failure, in which case we try again later.
500 if (ndev_recv(ethif->ethif_ndev, pbuf) != OK) {
501 pbuf_free(pbuf);
503 break;
507 * Hold on to the packet buffer until the receive request
508 * completes or is aborted, or the driver disappears.
510 *ethif->ethif_rcv.er_tailp = pbuf;
511 ethif->ethif_rcv.er_tailp = pchain_end(pbuf);
516 * Complete the link-layer header of the packet by filling in a source address.
517 * This is relevant for BPF-generated packets only, and thus we can safely
518 * modify the given pbuf.
520 static void
521 ethif_hdrcmplt(struct ifdev * ifdev, struct pbuf * pbuf)
523 struct netif *netif;
525 /* Make sure there is an ethernet packet header at all. */
526 if (pbuf->len < ETH_HDR_LEN)
527 return;
529 netif = ifdev_get_netif(ifdev);
532 * Insert the source ethernet address into the packet. The source
533 * address is located right after the destination address at the start
534 * of the packet.
536 memcpy((uint8_t *)pbuf->payload + netif->hwaddr_len, netif->hwaddr,
537 netif->hwaddr_len);
541 * Return TRUE if the given additional number of spare tokens may be used, or
542 * FALSE if the limit has been reached. Each spare token represents one
543 * enqueued pbuf. The limit must be such that we do not impede normal traffic
544 * but also do not spend the entire buffer pool on enqueued packets.
546 static int
547 ethif_can_spare(unsigned int spares)
549 unsigned int max;
552 * Use the configured maximum, which depends on the current size of the
553 * buffer pool.
555 max = ETHIF_PBUF_MAX_1;
558 * However, limit the total to a value based on the maximum number of
559 * TCP packets that can, in the worst case, be expected to queue up at
560 * any single moment.
562 if (max > ETHIF_PBUF_MAX_2)
563 max = ETHIF_PBUF_MAX_2;
565 return (spares + ethif_spares <= max - ETHIF_PBUF_MIN * NR_NDEV);
569 * Process a packet as output on an ethernet interface.
571 static err_t
572 ethif_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif)
574 struct ethif *ethif = (struct ethif *)ifdev;
575 struct pbuf *pref, *pcopy;
576 size_t padding;
577 unsigned int count, spares;
579 /* Packets must never be sent on behalf of another interface. */
580 assert(netif == NULL);
583 * The caller already rejects packets while the interface or link is
584 * down. We do want to keep enqueuing packets while the driver is
585 * restarting, so do not check ETHIFF_DISABLED or ETHIFF_FIRST_CONF.
589 * Reject oversized packets immediately. This should not happen.
590 * Undersized packets are padded below.
592 if (pbuf->tot_len > NDEV_ETH_PACKET_MAX) {
593 printf("LWIP: attempt to send oversized ethernet packet "
594 "(size %u)\n", pbuf->tot_len);
595 util_stacktrace();
597 return ERR_MEM;
601 * The original lwIP idea for processing output packets is that we make
602 * a copy of the packet here, so that lwIP is free to do whatever it
603 * wants with the original packet (e.g., keep on the TCP retransmission
604 * queue). More recently, lwIP has made progress towards allowing the
605 * packet to be referenced only, decreasing the reference count only
606 * once the packet has been actually sent. For many embedded systems,
607 * that change now allows zero-copy transmission with direct DMA from
608 * the provided packet buffer. We are not so lucky: we have to make an
609 * additional inter-process copy anyway. We do however use the same
610 * referencing system to avoid having to make yet another copy of the
611 * packet here.
613 * There was previously a check on (pbuf->ref > 1) here, to ensure that
614 * we would never enqueue packets that are retransmitted while we were
615 * still in the process of sending the initial copy. Now that for ARP
616 * and NDP queuing, packets are referenced rather than copied (lwIP
617 * patch #9272), we can no longer perform that check: packets may
618 * legitimately have a reference count of 2 at this point. The second
619 * reference will be dropped by the caller immediately after we return.
623 * There are two cases in which we need to make a copy of the packet
624 * after all:
626 * 1) in the case that the packet needs to be padded in order to reach
627 * the minimum ethernet packet size (for drivers' convenience);
628 * 2) in the (much more exceptional) case that the given pbuf chain
629 * exceeds the maximum vector size for network driver requests.
631 if (NDEV_ETH_PACKET_MIN > pbuf->tot_len)
632 padding = NDEV_ETH_PACKET_MIN - pbuf->tot_len;
633 else
634 padding = 0;
636 count = pbuf_clen(pbuf);
638 if (padding != 0 || count > NDEV_IOV_MAX) {
639 pcopy = pchain_alloc(PBUF_RAW, pbuf->tot_len + padding);
640 if (pcopy == NULL) {
641 ifdev_output_drop(ifdev);
643 return ERR_MEM;
646 if (pbuf_copy(pcopy, pbuf) != ERR_OK)
647 panic("unexpected pbuf copy failure");
649 if (padding > 0) {
651 * This restriction can be lifted if needed, but it
652 * involves hairy pbuf traversal and our standard pool
653 * size should be way in excess of the minimum packet
654 * size.
656 assert(pcopy->len == pbuf->tot_len + padding);
658 memset((char *)pcopy->payload + pbuf->tot_len, 0,
659 padding);
662 count = pbuf_clen(pcopy);
663 assert(count <= NDEV_IOV_MAX);
665 pbuf = pcopy;
666 } else
667 pcopy = NULL;
670 * Restrict the size of the send queue, so that it will not exhaust the
671 * buffer pool.
673 if (ethif->ethif_snd.es_count >= ETHIF_PBUF_MIN)
674 spares = count;
675 else if (ethif->ethif_snd.es_count + count > ETHIF_PBUF_MIN)
676 spares = ethif->ethif_snd.es_count + count - ETHIF_PBUF_MIN;
677 else
678 spares = 0;
680 if (spares > 0 && !ethif_can_spare(spares)) {
681 if (pcopy != NULL)
682 pbuf_free(pcopy);
684 ifdev_output_drop(ifdev);
686 return ERR_MEM;
690 * A side effect of the referencing approach is that we cannot touch
691 * the last pbuf's "next" pointer. Thus, we need another way of
692 * linking together the buffers on the send queue. We use a linked
693 * list of PBUF_REF-type buffers for this instead. However, do this
694 * only when we have not made a copy of the original pbuf, because then
695 * we might as well use the copy instead.
697 if (pcopy == NULL) {
698 if ((pref = pbuf_alloc(PBUF_RAW, 0, PBUF_REF)) == NULL) {
699 ifdev_output_drop(ifdev);
701 return ERR_MEM;
704 pbuf_ref(pbuf);
706 pref->payload = pbuf;
707 pref->tot_len = 0;
708 pref->len = count;
709 } else
710 pref = pcopy;
712 /* If the send queue was empty so far, set the IFF_OACTIVE flag. */
713 if (ethif->ethif_snd.es_head == NULL)
714 ifdev_update_ifflags(&ethif->ethif_ifdev,
715 ifdev_get_ifflags(&ethif->ethif_ifdev) | IFF_OACTIVE);
718 * Enqueue the packet on the send queue. It will be sent from the
719 * polling function as soon as possible. TODO: see if sending it from
720 * here makes any performance difference at all.
722 *ethif->ethif_snd.es_tailp = pref;
723 ethif->ethif_snd.es_tailp = pchain_end(pref);
725 ethif->ethif_snd.es_count += count;
726 ethif_spares += spares;
728 return ERR_OK;
732 * Transmit an ethernet packet on an ethernet interface, as requested by lwIP.
734 static err_t
735 ethif_linkoutput(struct netif * netif, struct pbuf * pbuf)
737 struct ifdev *ifdev = netif_get_ifdev(netif);
740 * Let ifdev make the callback to our output function, so that it can
741 * pass the packet to BPF devices and generically update statistics.
743 return ifdev_output(ifdev, pbuf, NULL /*netif*/, TRUE /*to_bpf*/,
744 TRUE /*hdrcmplt*/);
748 * The multicast address list has changed. See to it that the change will make
749 * it to the network driver at some point.
751 static err_t
752 ethif_set_mcast(struct ethif * ethif)
756 * Simply generate a mode change request, unless the interface is down.
757 * Once the mode change request is about to be sent to the driver, we
758 * will recompute the multicast settings.
760 if (ifdev_is_up(&ethif->ethif_ifdev))
761 ethif->ethif_wanted.nconf_set |= NDEV_SET_MODE;
763 return ERR_OK;
767 * An IPv4 multicast address has been added to or removed from the list of IPv4
768 * multicast addresses.
770 static err_t
771 ethif_set_mcast_v4(struct netif * netif, const ip4_addr_t * group __unused,
772 enum netif_mac_filter_action action __unused)
775 return ethif_set_mcast((struct ethif *)netif_get_ifdev(netif));
779 * An IPv6 multicast address has been added to or removed from the list of IPv6
780 * multicast addresses.
782 static err_t
783 ethif_set_mcast_v6(struct netif * netif, const ip6_addr_t * group __unused,
784 enum netif_mac_filter_action action __unused)
787 return ethif_set_mcast((struct ethif *)netif_get_ifdev(netif));
791 * Initialization function for an ethernet-type netif interface, called from
792 * lwIP at interface creation time.
794 static err_t
795 ethif_init_netif(struct ifdev * ifdev, struct netif * netif)
797 struct ethif *ethif = (struct ethif *)ifdev;
800 * Fill in a dummy name. Since it is only two characters, do not
801 * bother trying to reuse part of the given name. If this name is ever
802 * actually used anywhere, the dummy should suffice for debugging.
804 netif->name[0] = 'e';
805 netif->name[1] = 'n';
807 netif->linkoutput = ethif_linkoutput;
809 memset(netif->hwaddr, 0, sizeof(netif->hwaddr));
812 * Set the netif flags, partially based on the capabilities reported by
813 * the network device driver. The reason that we do this now is that
814 * lwIP tests for some of these flags and starts appropriate submodules
815 * (e.g., IGMP) right after returning from this function. If we set
816 * the flags later, we also have to take over management of those
817 * submodules, which is something we'd rather avoid. For this reason
818 * in particular, we also do not support capability mask changes after
819 * driver restarts - see ethif_enable().
821 netif->flags = NETIF_FLAG_ETHARP | NETIF_FLAG_ETHERNET;
823 if (ethif->ethif_caps & NDEV_CAP_BCAST)
824 netif->flags |= NETIF_FLAG_BROADCAST;
826 if (ethif->ethif_caps & NDEV_CAP_MCAST) {
827 /* The IGMP code adds the all-stations multicast entry. */
828 netif->igmp_mac_filter = ethif_set_mcast_v4;
830 netif->flags |= NETIF_FLAG_IGMP;
832 /* For MLD6 we have to add the all-nodes entry ourselves. */
833 netif->mld_mac_filter = ethif_set_mcast_v6;
835 netif->flags |= NETIF_FLAG_MLD6;
838 return ERR_OK;
842 * The ndev layer reports that a new network device driver has appeared, with
843 * the given ndev identifier, a driver-given name, and a certain set of
844 * capabilities. Create a new ethernet interface object for it. On success,
845 * return a pointer to the object (for later callbacks from ndev). In that
846 * case, the ndev layer will always immediately call ethif_enable() afterwards.
847 * On failure, return NULL, in which case ndev will forget about the driver.
849 struct ethif *
850 ethif_add(ndev_id_t id, const char * name, uint32_t caps)
852 struct ethif *ethif;
853 unsigned int ifflags;
854 int r;
857 * First make sure that the interface name is valid, unique, and not
858 * reserved for virtual interface types.
860 if ((r = ifdev_check_name(name, NULL /*vtype_slot*/)) != OK) {
862 * There is some risk in printing bad stuff, but this may help
863 * in preventing serious driver writer frustration..
865 printf("LWIP: invalid driver name '%s' (%d)\n", name, r);
867 return NULL;
870 /* Then see if there is a free ethernet interface object available. */
871 if (SIMPLEQ_EMPTY(&ethif_freelist)) {
872 printf("LWIP: out of slots for driver name '%s'\n", name);
874 return NULL;
878 * All good; set up the interface. First initialize the object, since
879 * adding the interface to lwIP might spawn some activity right away.
881 ethif = SIMPLEQ_FIRST(&ethif_freelist);
882 SIMPLEQ_REMOVE_HEAD(&ethif_freelist, ethif_next);
884 /* Initialize the ethif structure. */
885 memset(ethif, 0, sizeof(*ethif));
886 ethif->ethif_ndev = id;
887 ethif->ethif_flags = ETHIFF_DISABLED;
888 ethif->ethif_caps = caps;
890 ethif->ethif_snd.es_head = NULL;
891 ethif->ethif_snd.es_unsentp = &ethif->ethif_snd.es_head;
892 ethif->ethif_snd.es_tailp = &ethif->ethif_snd.es_head;
893 ethif->ethif_snd.es_count = 0;
895 ethif->ethif_rcv.er_head = NULL;
896 ethif->ethif_rcv.er_tailp = &ethif->ethif_rcv.er_head;
899 * Set all the three configurations to the same initial values. Since
900 * any change to the configuration will go through all three, this
901 * allows us to obtain various parts of the status (in particular, the
902 * mode, flags, enabled capabilities, and media type selection) from
903 * any of the three without having to consult the others. Note that
904 * the hardware address is set to a indeterminate initial value, as it
905 * is left to the network driver unless specifically overridden.
907 ethif->ethif_active.nconf_set = 0;
908 ethif->ethif_active.nconf_mode = NDEV_MODE_DOWN;
909 ethif->ethif_active.nconf_flags = 0;
910 ethif->ethif_active.nconf_caps = 0;
911 ethif->ethif_active.nconf_media =
912 IFM_MAKEWORD(IFM_ETHER, IFM_AUTO, 0, 0);
913 memcpy(&ethif->ethif_pending, &ethif->ethif_active,
914 sizeof(ethif->ethif_pending));
915 memcpy(&ethif->ethif_wanted, &ethif->ethif_pending,
916 sizeof(ethif->ethif_wanted));
919 * Compute the initial NetBSD-style interface flags. The IFF_SIMPLEX
920 * interface flag is always enabled because we do not support network
921 * drivers that are receiving their own packets. In particular, lwIP
922 * currently does not deal well with receiving back its own multicast
923 * packets, which leads to IPv6 DAD failures. The other two flags
924 * (IFF_BROADCAST, IFF_MULTICAST) denote capabilities, not enabled
925 * receipt modes.
927 ifflags = IFF_SIMPLEX;
928 if (caps & NDEV_CAP_BCAST)
929 ifflags |= IFF_BROADCAST;
930 if (caps & NDEV_CAP_MCAST)
931 ifflags |= IFF_MULTICAST;
933 /* Finally, add the interface to ifdev and lwIP. This cannot fail. */
934 ifdev_add(&ethif->ethif_ifdev, name, ifflags, IFT_ETHER, ETH_HDR_LEN,
935 ETHARP_HWADDR_LEN, DLT_EN10MB, ETHIF_DEF_MTU,
936 ND6_IFF_PERFORMNUD | ND6_IFF_AUTO_LINKLOCAL, &ethif_ops);
938 return ethif;
942 * The link status and/or media type of an ethernet interface has changed.
944 static void
945 ethif_set_status(struct ethif * ethif, uint32_t link, uint32_t media)
947 unsigned int iflink;
949 /* We save the media type locally for now. */
950 ethif->ethif_media = media;
952 /* Let the ifdev module handle the details of the link change. */
953 switch (link) {
954 case NDEV_LINK_UP: iflink = LINK_STATE_UP; break;
955 case NDEV_LINK_DOWN: iflink = LINK_STATE_DOWN; break;
956 default: iflink = LINK_STATE_UNKNOWN; break;
959 ifdev_update_link(&ethif->ethif_ifdev, iflink);
963 * The ndev layer reports that a previously added or disabled network device
964 * driver has been (re)enabled. Start by initializing the driver. Return TRUE
965 * if the interface could indeed be enabled, or FALSE if it should be forgotten
966 * altogether after all.
969 ethif_enable(struct ethif * ethif, const char * name,
970 const struct ndev_hwaddr * hwaddr, uint8_t hwaddr_len, uint32_t caps,
971 uint32_t link, uint32_t media)
973 int r;
975 assert(ethif->ethif_flags & ETHIFF_DISABLED);
978 * One disadvantage of keeping service labels and ethernet driver names
979 * disjunct is that the ethernet driver may mess with its name between
980 * restarts. Ultimately we may end up renaming our ethernet drivers
981 * such that their labels match their names, in which case we no longer
982 * need the drivers themselves to produce a name, and we can retire
983 * this check.
985 if (name != NULL && strcmp(ethif_get_name(ethif), name)) {
986 printf("LWIP: driver '%s' restarted with name '%s'\n",
987 ethif_get_name(ethif), name);
989 return FALSE;
993 * The hardware address length is just a sanity check for now. After
994 * the initialization reply, we assume the same length is used for all
995 * addresses, which is also the maximum, namely 48 bits (six bytes).
997 if (hwaddr_len != ETHARP_HWADDR_LEN) {
998 printf("LWIP: driver '%s' reports hwaddr length %u\n",
999 ethif_get_name(ethif), hwaddr_len);
1001 return FALSE;
1005 * If the driver has changed its available capabilities as a result of
1006 * a restart, we have a problem: we may already have configured the
1007 * interface's netif object to make use of of some of those
1008 * capabilities. TODO: we can deal with some cases (e.g., disappearing
1009 * checksum offloading capabilities) with some effort, and with other
1010 * cases (e.g., disappearing multicast support) with a LOT more effort.
1012 if (ethif->ethif_caps != caps) {
1013 printf("LWIP: driver '%s' changed capabilities\n",
1014 ethif_get_name(ethif));
1016 return FALSE;
1020 * Set the hardware address on the interface, unless a request is
1021 * currently pending to change it, in which case the new address has
1022 * been set already and we do not want to revert that change. If not,
1023 * we always set the address, because it may have changed as part of a
1024 * driver restart and we do not want to get out of sync with it, nor
1025 * can we necessarily change it back.
1027 if (!(ethif->ethif_active.nconf_set & NDEV_SET_HWADDR) &&
1028 !(ethif->ethif_pending.nconf_set & NDEV_SET_HWADDR))
1029 ifdev_update_hwaddr(&ethif->ethif_ifdev, hwaddr->nhwa_addr,
1030 (name == NULL) /*is_factory*/);
1033 * At this point, only one more thing can fail: it is possible that we
1034 * do not manage to send the first configuration request due to memory
1035 * shortage. This is extremely unlikely to happen, so send the conf
1036 * request first and forget the entire driver if it fails.
1039 * Always generate a new multicast list before sending a configuration
1040 * request, and at no other time (since there may be a grant for it).
1042 ethif_gen_mcast(ethif, &ethif->ethif_active);
1044 if ((r = ndev_conf(ethif->ethif_ndev, &ethif->ethif_active)) != OK) {
1045 printf("LWIP: sending first configuration to '%s' failed "
1046 "(%d)\n", ethif_get_name(ethif), r);
1048 return FALSE;
1051 ethif_set_status(ethif, link, media);
1053 ethif->ethif_flags &= ~ETHIFF_DISABLED;
1054 ethif->ethif_flags |= ETHIFF_FIRST_CONF;
1056 return TRUE;
1060 * The configuration change stored in the "pending" slot of the given ethif
1061 * object has been acknowledged by the network device driver (or the driver has
1062 * died, see ethif_disable()). Apply changes to the "active" slot of the given
1063 * ethif object, as well as previously delayed changes to lwIP through netif.
1065 static void
1066 ethif_post_conf(struct ethif * ethif)
1068 struct ndev_conf *nconf;
1069 unsigned int flags;
1071 nconf = &ethif->ethif_pending;
1074 * Now that the driver configuration has changed, we know that the
1075 * new checksum settings will be applied to all sent and received
1076 * packets, and we can disable checksumming flags in netif as desired.
1077 * Enabling checksumming flags has already been done earlier on.
1079 if (nconf->nconf_set & NDEV_SET_CAPS) {
1080 flags = ethif_get_netif(ethif)->chksum_flags;
1082 if (nconf->nconf_caps & NDEV_CAP_CS_IP4_TX)
1083 flags &= ~NETIF_CHECKSUM_GEN_IP;
1084 if (nconf->nconf_caps & NDEV_CAP_CS_IP4_RX)
1085 flags &= ~NETIF_CHECKSUM_CHECK_IP;
1086 if (nconf->nconf_caps & NDEV_CAP_CS_UDP_TX)
1087 flags &= ~NETIF_CHECKSUM_GEN_UDP;
1088 if (nconf->nconf_caps & NDEV_CAP_CS_UDP_RX)
1089 flags &= ~NETIF_CHECKSUM_CHECK_UDP;
1090 if (nconf->nconf_caps & NDEV_CAP_CS_TCP_TX)
1091 flags &= ~NETIF_CHECKSUM_GEN_TCP;
1092 if (nconf->nconf_caps & NDEV_CAP_CS_TCP_RX)
1093 flags &= ~NETIF_CHECKSUM_CHECK_TCP;
1095 NETIF_SET_CHECKSUM_CTRL(ethif_get_netif(ethif), flags);
1099 * Merge any individual parts of the now acknowledged configuration
1100 * changes into the active configuration. The result is that we are
1101 * able to reapply these changes at any time should the network driver
1102 * be restarted. In addition, by only setting bits for fields that
1103 * have actually changed, we can later tell whether the user wanted the
1104 * change or ethif should just take over what the driver reports after
1105 * a restart; this is important for HW-address and media settings.
1107 ethif_merge_conf(&ethif->ethif_active, &ethif->ethif_pending);
1111 * All receive requests have been canceled at the ndev layer, because the
1112 * network device driver has been restarted or shut down. Clear the receive
1113 * queue, freeing any packets in it.
1115 static void
1116 ethif_drain(struct ethif * ethif)
1118 struct pbuf *pbuf, **pnext;
1120 while ((pbuf = ethif->ethif_rcv.er_head) != NULL) {
1121 pnext = pchain_end(pbuf);
1123 if ((ethif->ethif_rcv.er_head = *pnext) == NULL)
1124 ethif->ethif_rcv.er_tailp = &ethif->ethif_rcv.er_head;
1126 *pnext = NULL;
1127 pbuf_free(pbuf);
1132 * The network device driver has stopped working (i.e., crashed), but has not
1133 * been shut down completely, and is expect to come back later.
1135 void
1136 ethif_disable(struct ethif * ethif)
1140 * We assume, optimistically, that a new instance of the driver will be
1141 * brought up soon after which we can continue operating as before. As
1142 * such, we do not want to change most of the user-visible state until
1143 * we know for sure that our optimism was in vain. In particular, we
1144 * do *not* want to change the following parts of the state here:
1146 * - the contents of the send queue;
1147 * - the state of the interface (up or down);
1148 * - the state and media type of the physical link.
1150 * The main user-visible indication of the crash will be that the
1151 * interface does not have the IFF_RUNNING flag set.
1155 * If a configuration request was pending, it will be lost now. Highly
1156 * unintuitively, make the requested configuration the *active* one,
1157 * just as though the request completed successfully. This works,
1158 * because once the driver comes back, the active configuration will be
1159 * replayed as initial configuration. Therefore, by pretending that
1160 * the current request went through, we ensure that it too will be sent
1161 * to the new instance--before anything else is allowed to happen.
1163 if (ethif->ethif_pending.nconf_set != 0)
1164 ethif_post_conf(ethif);
1167 * Any packet send requests have been lost, too, and likewise forgotten
1168 * by ndev. Thus, we need to forget that we sent any packets, so that
1169 * they will be resent after the driver comes back up. That *may*
1170 * cause packet duplication, but that is preferable over packet loss.
1172 ethif->ethif_snd.es_unsentp = &ethif->ethif_snd.es_head;
1175 * We fully restart the receive queue, because all receive requests
1176 * have been forgotten by ndev as well now and it is easier to simply
1177 * reconstruct the receive queue in its entirety later on.
1179 ethif_drain(ethif);
1181 /* Make sure we do not attempt to initiate new requests for now. */
1182 ethif->ethif_flags &= ~ETHIFF_FIRST_CONF;
1183 ethif->ethif_flags |= ETHIFF_DISABLED;
1187 * Dequeue and discard the packet at the head of the send queue.
1189 static void
1190 ethif_dequeue_send(struct ethif * ethif)
1192 struct pbuf *pref, *pbuf, **pnext;
1193 unsigned int count, spares;
1196 * The send queue is a linked list of reference buffers, each of which
1197 * links to the actual packet. Dequeue the first reference buffer.
1199 pref = ethif->ethif_snd.es_head;
1200 assert(pref != NULL);
1202 pnext = pchain_end(pref);
1204 if (ethif->ethif_snd.es_unsentp == pnext)
1205 ethif->ethif_snd.es_unsentp = &ethif->ethif_snd.es_head;
1207 if ((ethif->ethif_snd.es_head = *pnext) == NULL)
1208 ethif->ethif_snd.es_tailp = &ethif->ethif_snd.es_head;
1210 /* Do this before possibly calling pbuf_clen() below.. */
1211 *pnext = NULL;
1214 * If we never made a copy of the original packet, we now have it
1215 * pointed to by a reference buffer. If so, decrease the reference
1216 * count of the actual packet, thereby freeing it if lwIP itself was
1217 * already done with. Otherwise, the copy of the packet is the
1218 * reference buffer itself. In both cases we need to free that buffer.
1220 if (pref->type == PBUF_REF) {
1221 pbuf = (struct pbuf *)pref->payload;
1223 pbuf_free(pbuf);
1225 count = pref->len;
1226 } else
1227 count = pbuf_clen(pref);
1229 assert(count > 0);
1230 assert(ethif->ethif_snd.es_count >= count);
1231 ethif->ethif_snd.es_count -= count;
1233 if (ethif->ethif_snd.es_count >= ETHIF_PBUF_MIN)
1234 spares = count;
1235 else if (ethif->ethif_snd.es_count + count > ETHIF_PBUF_MIN)
1236 spares = ethif->ethif_snd.es_count + count - ETHIF_PBUF_MIN;
1237 else
1238 spares = 0;
1240 assert(ethif_spares >= spares);
1241 ethif_spares -= spares;
1243 /* Free the reference buffer as well. */
1244 pbuf_free(pref);
1246 /* If the send queue is now empty, clear the IFF_OACTIVE flag. */
1247 if (ethif->ethif_snd.es_head == NULL)
1248 ifdev_update_ifflags(&ethif->ethif_ifdev,
1249 ifdev_get_ifflags(&ethif->ethif_ifdev) & ~IFF_OACTIVE);
1253 * The ndev layer reports that a network device driver has been permanently
1254 * shut down. Remove the corresponding ethernet interface from the system.
1256 void
1257 ethif_remove(struct ethif * ethif)
1259 int r;
1261 /* Clear the send and receive queues. */
1262 while (ethif->ethif_snd.es_head != NULL)
1263 ethif_dequeue_send(ethif);
1265 ethif_drain(ethif);
1267 /* Let the ifdev module deal with most other removal aspects. */
1268 if ((r = ifdev_remove(&ethif->ethif_ifdev)) != OK)
1269 panic("unable to remove ethernet interface: %d", r);
1271 /* Finally, readd the ethif object to the free list. */
1272 SIMPLEQ_INSERT_HEAD(&ethif_freelist, ethif, ethif_next);
1276 * The ndev layer reports that the (oldest) pending configuration request has
1277 * completed with the given result.
1279 void
1280 ethif_configured(struct ethif * ethif, int32_t result)
1284 * The driver is not supposed to return failure in response to a
1285 * configure result. If it does, we have no proper way to recover, as
1286 * we may already have applied part of the new configuration to netif.
1287 * For now, just report failure and then pretend success.
1289 if (result < 0) {
1290 printf("LWIP: driver '%s' replied with conf result %d\n",
1291 ethif_get_name(ethif), result);
1293 result = 0;
1296 if (ethif->ethif_flags & ETHIFF_FIRST_CONF)
1297 ethif->ethif_flags &= ~ETHIFF_FIRST_CONF;
1298 else
1299 ethif_post_conf(ethif);
1302 * For now, the result is simply a boolean value indicating whether the
1303 * driver is using the all-multicast receive mode instead of the
1304 * multicast-list receive mode. We can turn it into a bitmap later.
1306 if (result != 0) {
1307 ethif->ethif_active.nconf_mode &= ~NDEV_MODE_MCAST_LIST;
1308 ethif->ethif_active.nconf_mode |= NDEV_MODE_MCAST_ALL;
1311 /* The interface flags may have changed now, so update them. */
1312 ethif_update_ifflags(ethif);
1314 /* Regular operation will resume from the polling function. */
1318 * The ndev layer reports that the first packet on the send queue has been
1319 * successfully transmitted with 'result' set to OK, or dropped if 'result' is
1320 * negative. The latter may happen if the interface was taken down while there
1321 * were still packets in transit.
1323 void
1324 ethif_sent(struct ethif * ethif, int32_t result)
1327 ethif_dequeue_send(ethif);
1329 if (result < 0)
1330 ifdev_output_drop(&ethif->ethif_ifdev);
1332 /* More requests may be sent from the polling function now. */
1336 * The ndev layer reports that the first buffer on the receive queue has been
1337 * filled with a packet of 'result' bytes, or if 'result' is negative, the
1338 * receive request has been aborted.
1340 void
1341 ethif_received(struct ethif * ethif, int32_t result)
1343 struct pbuf *pbuf, *pwalk, **pnext;
1344 size_t left;
1347 * Start by removing the first buffer chain off the receive queue. The
1348 * ndev layer guarantees that there ever was a receive request at all.
1350 if ((pbuf = ethif->ethif_rcv.er_head) == NULL)
1351 panic("driver received packet but queue empty");
1353 pnext = pchain_end(pbuf);
1355 if ((ethif->ethif_rcv.er_head = *pnext) == NULL)
1356 ethif->ethif_rcv.er_tailp = &ethif->ethif_rcv.er_head;
1357 *pnext = NULL;
1359 /* Decide if we can and should deliver a packet to the layers above. */
1360 if (result <= 0 || !ethif_can_recv(ethif)) {
1361 pbuf_free(pbuf);
1363 return;
1366 if (result > pbuf->tot_len) {
1367 printf("LWIP: driver '%s' returned bad packet size (%zd)\n",
1368 ethif_get_name(ethif), (ssize_t)result);
1370 pbuf_free(pbuf);
1372 return;
1376 * The packet often does not use all of the buffers, or at least not
1377 * all of the last buffer. Adjust lengths for the buffers that contain
1378 * part of the packet, and free the remaining (unused) buffers, if any.
1380 left = (size_t)result;
1382 for (pwalk = pbuf; ; pwalk = pwalk->next) {
1383 pwalk->tot_len = left;
1384 if (pwalk->len > left)
1385 pwalk->len = left;
1386 left -= pwalk->len;
1387 if (left == 0)
1388 break;
1391 if (pwalk->next != NULL) {
1392 pbuf_free(pwalk->next);
1394 pwalk->next = NULL;
1398 * Finally, hand off the packet to the layers above. We go through
1399 * ifdev so that it can pass the packet to BPF devices and update
1400 * statistics and all that.
1402 ifdev_input(&ethif->ethif_ifdev, pbuf, NULL /*netif*/,
1403 TRUE /*to_bpf*/);
1407 * The ndev layer reports a network driver status update. If anything has
1408 * changed since the last status, we may have to take action. The given
1409 * statistics counters are relative to the previous status report.
1411 void
1412 ethif_status(struct ethif * ethif, uint32_t link, uint32_t media,
1413 uint32_t oerror, uint32_t coll, uint32_t ierror, uint32_t iqdrop)
1415 struct if_data *ifdata;
1417 ethif_set_status(ethif, link, media);
1419 ifdata = ifdev_get_ifdata(&ethif->ethif_ifdev);
1420 ifdata->ifi_oerrors += oerror;
1421 ifdata->ifi_collisions += coll;
1422 ifdata->ifi_ierrors += ierror;
1423 ifdata->ifi_iqdrops += iqdrop;
1427 * Set NetBSD-style interface flags (IFF_) for an ethernet interface.
1429 static int
1430 ethif_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
1432 struct ethif *ethif = (struct ethif *)ifdev;
1433 uint32_t mode, flags;
1436 * We do not support IFF_NOARP at this time, because lwIP does not: the
1437 * idea of IFF_NOARP is that only static ARP entries are used, but lwIP
1438 * does not support separating static from dynamic ARP operation. The
1439 * flag does not appear to be particularly widely used anyway.
1441 if ((ifflags & ~(IFF_UP | IFF_DEBUG | IFF_LINK0 | IFF_LINK1 |
1442 IFF_LINK2)) != 0)
1443 return EINVAL;
1445 mode = ethif->ethif_wanted.nconf_mode;
1446 if ((ifflags & IFF_UP) && mode == NDEV_MODE_DOWN) {
1447 mode = NDEV_MODE_UP;
1449 /* Always enable broadcast receipt when supported. */
1450 if (ethif->ethif_caps & NDEV_CAP_BCAST)
1451 mode |= NDEV_MODE_BCAST;
1453 if (ifdev_is_promisc(ifdev))
1454 mode |= NDEV_MODE_PROMISC;
1457 * The multicast flags will be set right before we send the
1458 * request to the driver.
1460 } else if (!(ifflags & IFF_UP) && mode != NDEV_MODE_DOWN)
1461 ethif->ethif_wanted.nconf_mode = NDEV_MODE_DOWN;
1463 if (mode != ethif->ethif_wanted.nconf_mode) {
1464 ethif->ethif_wanted.nconf_mode = mode;
1465 ethif->ethif_wanted.nconf_set |= NDEV_SET_MODE;
1469 * Some of the interface flags (UP, DEBUG, PROMISC, LINK[0-2]) are a
1470 * reflection of the intended state as set by userland before, so that
1471 * a userland utility will never not see the flag it just set (or the
1472 * other way around). These flags therefore do not necessarily reflect
1473 * what is actually going on at that moment. We cannot have both.
1475 flags = 0;
1476 if (ifflags & IFF_DEBUG)
1477 flags |= NDEV_FLAG_DEBUG;
1478 if (ifflags & IFF_LINK0)
1479 flags |= NDEV_FLAG_LINK0;
1480 if (ifflags & IFF_LINK1)
1481 flags |= NDEV_FLAG_LINK1;
1482 if (ifflags & IFF_LINK2)
1483 flags |= NDEV_FLAG_LINK2;
1485 if (flags != ethif->ethif_wanted.nconf_flags) {
1486 ethif->ethif_wanted.nconf_flags = flags;
1487 ethif->ethif_wanted.nconf_set |= NDEV_SET_FLAGS;
1490 /* The changes will be picked up from the polling function. */
1491 return OK;
1495 * Convert a bitmask of ndev-layer capabilities (NDEV_CAP_) to NetBSD-style
1496 * interface capabilities (IFCAP_).
1498 static uint64_t
1499 ethif_cap_to_ifcap(uint32_t caps)
1501 uint64_t ifcap;
1503 ifcap = 0;
1504 if (caps & NDEV_CAP_CS_IP4_TX)
1505 ifcap |= IFCAP_CSUM_IPv4_Tx;
1506 if (caps & NDEV_CAP_CS_IP4_RX)
1507 ifcap |= IFCAP_CSUM_IPv4_Rx;
1508 if (caps & NDEV_CAP_CS_UDP_TX)
1509 ifcap |= IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx;
1510 if (caps & NDEV_CAP_CS_UDP_RX)
1511 ifcap |= IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx;
1512 if (caps & NDEV_CAP_CS_TCP_TX)
1513 ifcap |= IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx;
1514 if (caps & NDEV_CAP_CS_TCP_RX)
1515 ifcap |= IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx;
1517 return ifcap;
1521 * Retrieve potential and enabled NetBSD-style interface capabilities (IFCAP_).
1523 static void
1524 ethif_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap, uint64_t * ifena)
1526 struct ethif *ethif = (struct ethif *)ifdev;
1528 *ifcap = ethif_cap_to_ifcap(ethif->ethif_caps);
1529 *ifena = ethif_cap_to_ifcap(ethif->ethif_wanted.nconf_caps);
1533 * Set NetBSD-style enabled interface capabilities (IFCAP_).
1535 static int
1536 ethif_set_ifcap(struct ifdev * ifdev, uint64_t ifcap)
1538 struct ethif *ethif = (struct ethif *)ifdev;
1539 unsigned int flags;
1540 uint32_t caps;
1542 if (ifcap & ~(IFCAP_CSUM_IPv4_Tx | IFCAP_CSUM_IPv4_Rx |
1543 IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx |
1544 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx |
1545 IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx |
1546 IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx))
1547 return EINVAL;
1550 * Some IPv4/IPv6 flags need to be set together in order to be picked
1551 * up. Unfortunately, that is all we can do given that lwIP does not
1552 * distinguish IPv4/IPv6 when it comes to TCP/UDP checksum flags.
1554 caps = 0;
1555 if (ifcap & IFCAP_CSUM_IPv4_Tx)
1556 caps |= NDEV_CAP_CS_IP4_TX;
1557 if (ifcap & IFCAP_CSUM_IPv4_Rx)
1558 caps |= NDEV_CAP_CS_IP4_RX;
1559 if ((ifcap & (IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx)) ==
1560 (IFCAP_CSUM_UDPv4_Tx | IFCAP_CSUM_UDPv6_Tx))
1561 caps |= NDEV_CAP_CS_UDP_TX;
1562 if ((ifcap & (IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx)) ==
1563 (IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv6_Rx))
1564 caps |= NDEV_CAP_CS_UDP_RX;
1565 if ((ifcap & (IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx)) ==
1566 (IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_TCPv6_Tx))
1567 caps |= NDEV_CAP_CS_TCP_TX;
1568 if ((ifcap & (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx)) ==
1569 (IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv6_Rx))
1570 caps |= NDEV_CAP_CS_TCP_RX;
1573 * When changing checksumming capabilities, we have to make sure that
1574 * we only ever checksum too much and never too little. This means
1575 * that we enable any checksum options in netif here, and disable any
1576 * checksum options in netif only after driver configuration.
1578 * Note that we have to draw the line somewhere with this kind of
1579 * self-protection, and that line is short of TCP retransmission: we
1580 * see it as lwIP's job to compute checksums for retransmitted TCP
1581 * packets if they were saved across checksum changes. Even though
1582 * lwIP may not care, there is little we can do about that anyway.
1584 if (ethif->ethif_wanted.nconf_caps != caps) {
1585 flags = ethif_get_netif(ethif)->chksum_flags;
1587 if (!(caps & NDEV_CAP_CS_IP4_TX))
1588 flags |= NETIF_CHECKSUM_GEN_IP;
1589 if (!(caps & NDEV_CAP_CS_IP4_RX))
1590 flags |= NETIF_CHECKSUM_CHECK_IP;
1591 if (!(caps & NDEV_CAP_CS_UDP_TX))
1592 flags |= NETIF_CHECKSUM_GEN_UDP;
1593 if (!(caps & NDEV_CAP_CS_UDP_RX))
1594 flags |= NETIF_CHECKSUM_CHECK_UDP;
1595 if (!(caps & NDEV_CAP_CS_TCP_TX))
1596 flags |= NETIF_CHECKSUM_GEN_TCP;
1597 if (!(caps & NDEV_CAP_CS_TCP_RX))
1598 flags |= NETIF_CHECKSUM_CHECK_TCP;
1600 NETIF_SET_CHECKSUM_CTRL(ethif_get_netif(ethif), flags);
1602 ethif->ethif_wanted.nconf_caps = caps;
1603 ethif->ethif_wanted.nconf_set |= NDEV_SET_CAPS;
1606 /* The changes will be picked up from the polling function. */
1607 return OK;
1611 * Retrieve NetBSD-style interface media type (IFM_). Return both the current
1612 * media type selection and the driver-reported active media type.
1614 static void
1615 ethif_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive)
1617 struct ethif *ethif = (struct ethif *)ifdev;
1620 * For the current select, report back whatever the user gave us, even
1621 * if it has not reached the driver at all yet.
1623 *ifcurrent = (int)ethif->ethif_wanted.nconf_media;
1624 *ifactive = (int)ethif->ethif_media;
1628 * Set current NetBSD-style interface media type (IFM_).
1630 static int
1631 ethif_set_ifmedia(struct ifdev * ifdev, int ifmedia)
1633 struct ethif *ethif = (struct ethif *)ifdev;
1636 * We currently completely lack the infrastructure to suspend the
1637 * current IOCTL call until the driver replies (or disappears).
1638 * Therefore we have no choice but to return success here, even if the
1639 * driver cannot accept the change. The driver does notify us of media
1640 * changes, so the user may observe the new active media type later.
1641 * Also note that the new media type may not be the requested type,
1642 * which is why we do not perform any checks against the wanted or
1643 * active media types.
1645 ethif->ethif_wanted.nconf_media = (uint32_t)ifmedia;
1646 ethif->ethif_wanted.nconf_set |= NDEV_SET_MEDIA;
1648 /* The change will be picked up from the polling function. */
1649 return OK;
1653 * Enable or disable promiscuous mode on the interface.
1655 static void
1656 ethif_set_promisc(struct ifdev * ifdev, int promisc)
1658 struct ethif *ethif = (struct ethif *)ifdev;
1660 if (ethif->ethif_wanted.nconf_mode != NDEV_MODE_DOWN) {
1661 if (promisc)
1662 ethif->ethif_wanted.nconf_mode |= NDEV_MODE_PROMISC;
1663 else
1664 ethif->ethif_wanted.nconf_mode &= ~NDEV_MODE_PROMISC;
1665 ethif->ethif_wanted.nconf_set |= NDEV_SET_MODE;
1668 /* The change will be picked up from the polling function. */
1672 * Set the hardware address on the interface.
1674 static int
1675 ethif_set_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr)
1677 struct ethif *ethif = (struct ethif *)ifdev;
1679 if (!(ethif->ethif_caps & NDEV_CAP_HWADDR))
1680 return EINVAL;
1682 memcpy(&ethif->ethif_wanted.nconf_hwaddr.nhwa_addr, hwaddr,
1683 ETHARP_HWADDR_LEN);
1684 ethif->ethif_wanted.nconf_set |= NDEV_SET_HWADDR;
1686 /* The change will be picked up from the polling function. */
1687 return OK;
1691 * Set the Maximum Transmission Unit for this interface. Return TRUE if the
1692 * new value is acceptable, in which case the caller will do the rest. Return
1693 * FALSE otherwise.
1695 static int
1696 ethif_set_mtu(struct ifdev * ifdev __unused, unsigned int mtu)
1699 return (mtu <= ETHIF_MAX_MTU);
1702 static const struct ifdev_ops ethif_ops = {
1703 .iop_init = ethif_init_netif,
1704 .iop_input = netif_input,
1705 .iop_output = ethif_output,
1706 .iop_output_v4 = etharp_output,
1707 .iop_output_v6 = ethip6_output,
1708 .iop_hdrcmplt = ethif_hdrcmplt,
1709 .iop_poll = ethif_poll,
1710 .iop_set_ifflags = ethif_set_ifflags,
1711 .iop_get_ifcap = ethif_get_ifcap,
1712 .iop_set_ifcap = ethif_set_ifcap,
1713 .iop_get_ifmedia = ethif_get_ifmedia,
1714 .iop_set_ifmedia = ethif_set_ifmedia,
1715 .iop_set_promisc = ethif_set_promisc,
1716 .iop_set_hwaddr = ethif_set_hwaddr,
1717 .iop_set_mtu = ethif_set_mtu,