1 /* $NetBSD: if_gre.c,v 1.140 2009/04/28 23:05:25 dyoung Exp $ */
4 * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Heiko W.Rupp <hwr@pilhuhn.de>
10 * IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
12 * GRE over UDP/IPv4/IPv6 sockets contributed by David Young <dyoung@NetBSD.org>
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
35 * This material is based upon work partially supported by NSF
36 * under Contract No. NSF CNS-0626584.
40 * Encapsulate L3 protocols into IP
41 * See RFC 1701 and 1702 for more details.
42 * If_gre is compatible with Cisco GRE tunnels, so you can
43 * have a NetBSD box as the other end of a tunnel interface of a Cisco
44 * router. See gre(4) for more details.
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: if_gre.c,v 1.140 2009/04/28 23:05:25 dyoung Exp $");
50 #include "opt_atalk.h"
55 #include <sys/param.h>
57 #include <sys/filedesc.h>
58 #include <sys/malloc.h>
59 #include <sys/mallocvar.h>
62 #include <sys/domain.h>
63 #include <sys/protosw.h>
64 #include <sys/socket.h>
65 #include <sys/socketvar.h>
66 #include <sys/ioctl.h>
67 #include <sys/queue.h>
69 #include <sys/systm.h>
70 #include <sys/sysctl.h>
71 #include <sys/kauth.h>
73 #include <sys/kernel.h>
74 #include <sys/mutex.h>
75 #include <sys/condvar.h>
76 #include <sys/kthread.h>
80 #include <net/ethertypes.h>
82 #include <net/if_types.h>
83 #include <net/netisr.h>
84 #include <net/route.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/in.h>
88 #include <netinet/ip.h> /* we always need this for sizeof(struct ip) */
91 #include <netinet/in_var.h>
92 #include <netinet/ip_var.h>
96 #include <netinet6/in6_var.h>
100 #include <netatalk/at.h>
101 #include <netatalk/at_var.h>
102 #include <netatalk/at_extern.h>
106 #include <sys/time.h>
110 #include <net/if_gre.h>
112 #include <compat/sys/socket.h>
113 #include <compat/sys/sockio.h>
115 * It is not easy to calculate the right value for a GRE MTU.
116 * We leave this task to the admin and use the same default that
123 #define GRE_DPRINTF(__sc, ...) \
125 if (__predict_false(gre_debug || \
126 ((__sc)->sc_if.if_flags & IFF_DEBUG) != 0)) { \
127 printf("%s.%d: ", __func__, __LINE__); \
128 printf(__VA_ARGS__); \
130 } while (/*CONSTCOND*/0)
132 #define GRE_DPRINTF(__sc, __fmt, ...) do { } while (/*CONSTCOND*/0)
133 #endif /* GRE_DEBUG */
135 int ip_gre_ttl
= GRE_TTL
;
136 MALLOC_DEFINE(M_GRE_BUFQ
, "gre_bufq", "gre mbuf queue");
138 static int gre_clone_create(struct if_clone
*, int);
139 static int gre_clone_destroy(struct ifnet
*);
141 static struct if_clone gre_cloner
=
142 IF_CLONE_INITIALIZER("gre", gre_clone_create
, gre_clone_destroy
);
144 static int gre_input(struct gre_softc
*, struct mbuf
*, int,
145 const struct gre_h
*);
146 static bool gre_is_nullconf(const struct gre_soparm
*);
147 static int gre_output(struct ifnet
*, struct mbuf
*,
148 const struct sockaddr
*, struct rtentry
*);
149 static int gre_ioctl(struct ifnet
*, u_long
, void *);
150 static int gre_getsockname(struct socket
*, struct mbuf
*, struct lwp
*);
151 static int gre_getpeername(struct socket
*, struct mbuf
*, struct lwp
*);
152 static int gre_getnames(struct socket
*, struct lwp
*,
153 struct sockaddr_storage
*, struct sockaddr_storage
*);
154 static void gre_clearconf(struct gre_soparm
*, bool);
155 static int gre_soreceive(struct socket
*, struct mbuf
**);
156 static int gre_sosend(struct socket
*, struct mbuf
*);
157 static struct socket
*gre_reconf(struct gre_softc
*, const struct gre_soparm
*);
159 static bool gre_fp_send(struct gre_softc
*, enum gre_msg
, file_t
*);
160 static bool gre_fp_recv(struct gre_softc
*);
161 static void gre_fp_recvloop(void *);
164 nearest_pow2(size_t len0
)
171 for (len
= len0
; (len
& (len
- 1)) != 0; len
&= len
- 1)
174 mid
= len
| (len
>> 1);
177 if ((len
<< 1) < len
)
184 static struct gre_bufq
*
185 gre_bufq_init(struct gre_bufq
*bq
, size_t len0
)
189 len
= nearest_pow2(len0
);
191 memset(bq
, 0, sizeof(*bq
));
192 bq
->bq_buf
= malloc(len
* sizeof(struct mbuf
*), M_GRE_BUFQ
, M_WAITOK
);
194 bq
->bq_lenmask
= len
- 1;
200 gre_bufq_empty(struct gre_bufq
*bq
)
202 return bq
->bq_prodidx
== bq
->bq_considx
;
206 gre_bufq_dequeue(struct gre_bufq
*bq
)
210 if (gre_bufq_empty(bq
))
213 m
= bq
->bq_buf
[bq
->bq_considx
];
214 bq
->bq_considx
= (bq
->bq_considx
+ 1) & bq
->bq_lenmask
;
220 gre_bufq_purge(struct gre_bufq
*bq
)
224 while ((m
= gre_bufq_dequeue(bq
)) != NULL
)
229 gre_bufq_enqueue(struct gre_bufq
*bq
, struct mbuf
*m
)
233 next
= (bq
->bq_prodidx
+ 1) & bq
->bq_lenmask
;
235 if (next
== bq
->bq_considx
) {
240 bq
->bq_buf
[bq
->bq_prodidx
] = m
;
241 bq
->bq_prodidx
= next
;
248 struct gre_softc
*sc
= (struct gre_softc
*)arg
;
249 struct socket
*so
= sc
->sc_soparm
.sp_so
;
255 sc
->sc_send_ev
.ev_count
++;
256 GRE_DPRINTF(sc
, "enter\n");
257 while ((m
= gre_bufq_dequeue(&sc
->sc_snd
)) != NULL
) {
258 /* XXX handle ENOBUFS? */
259 if ((rc
= gre_sosend(so
, m
)) != 0)
260 GRE_DPRINTF(sc
, "gre_sosend failed %d\n", rc
);
264 /* Caller must hold sc->sc_mtx. */
266 gre_wait(struct gre_softc
*sc
)
269 cv_wait(&sc
->sc_condvar
, &sc
->sc_mtx
);
274 gre_fp_wait(struct gre_softc
*sc
)
277 cv_wait(&sc
->sc_fp_condvar
, &sc
->sc_mtx
);
282 gre_evcnt_detach(struct gre_softc
*sc
)
284 evcnt_detach(&sc
->sc_unsupp_ev
);
285 evcnt_detach(&sc
->sc_pullup_ev
);
286 evcnt_detach(&sc
->sc_error_ev
);
287 evcnt_detach(&sc
->sc_block_ev
);
288 evcnt_detach(&sc
->sc_recv_ev
);
290 evcnt_detach(&sc
->sc_oflow_ev
);
291 evcnt_detach(&sc
->sc_send_ev
);
295 gre_evcnt_attach(struct gre_softc
*sc
)
297 evcnt_attach_dynamic(&sc
->sc_recv_ev
, EVCNT_TYPE_MISC
,
298 NULL
, sc
->sc_if
.if_xname
, "recv");
299 evcnt_attach_dynamic(&sc
->sc_block_ev
, EVCNT_TYPE_MISC
,
300 &sc
->sc_recv_ev
, sc
->sc_if
.if_xname
, "would block");
301 evcnt_attach_dynamic(&sc
->sc_error_ev
, EVCNT_TYPE_MISC
,
302 &sc
->sc_recv_ev
, sc
->sc_if
.if_xname
, "error");
303 evcnt_attach_dynamic(&sc
->sc_pullup_ev
, EVCNT_TYPE_MISC
,
304 &sc
->sc_recv_ev
, sc
->sc_if
.if_xname
, "pullup failed");
305 evcnt_attach_dynamic(&sc
->sc_unsupp_ev
, EVCNT_TYPE_MISC
,
306 &sc
->sc_recv_ev
, sc
->sc_if
.if_xname
, "unsupported");
308 evcnt_attach_dynamic(&sc
->sc_send_ev
, EVCNT_TYPE_MISC
,
309 NULL
, sc
->sc_if
.if_xname
, "send");
310 evcnt_attach_dynamic(&sc
->sc_oflow_ev
, EVCNT_TYPE_MISC
,
311 &sc
->sc_send_ev
, sc
->sc_if
.if_xname
, "overflow");
315 gre_clone_create(struct if_clone
*ifc
, int unit
)
318 struct gre_softc
*sc
;
319 struct gre_soparm
*sp
;
320 const struct sockaddr
*any
;
322 if ((any
= sockaddr_any_by_family(AF_INET
)) == NULL
&&
323 (any
= sockaddr_any_by_family(AF_INET6
)) == NULL
)
326 sc
= malloc(sizeof(struct gre_softc
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
327 mutex_init(&sc
->sc_mtx
, MUTEX_DRIVER
, IPL_SOFTNET
);
328 cv_init(&sc
->sc_condvar
, "gre wait");
329 cv_init(&sc
->sc_fp_condvar
, "gre fp");
331 if_initname(&sc
->sc_if
, ifc
->ifc_name
, unit
);
332 sc
->sc_if
.if_softc
= sc
;
333 sc
->sc_if
.if_type
= IFT_TUNNEL
;
334 sc
->sc_if
.if_addrlen
= 0;
335 sc
->sc_if
.if_hdrlen
= sizeof(struct ip
) + sizeof(struct gre_h
);
336 sc
->sc_if
.if_dlt
= DLT_NULL
;
337 sc
->sc_if
.if_mtu
= GREMTU
;
338 sc
->sc_if
.if_flags
= IFF_POINTOPOINT
|IFF_MULTICAST
;
339 sc
->sc_if
.if_output
= gre_output
;
340 sc
->sc_if
.if_ioctl
= gre_ioctl
;
342 sockaddr_copy(sstosa(&sp
->sp_dst
), sizeof(sp
->sp_dst
), any
);
343 sockaddr_copy(sstosa(&sp
->sp_src
), sizeof(sp
->sp_src
), any
);
344 sp
->sp_proto
= IPPROTO_GRE
;
345 sp
->sp_type
= SOCK_RAW
;
349 rc
= kthread_create(PRI_NONE
, KTHREAD_MPSAFE
, NULL
, gre_fp_recvloop
, sc
,
350 NULL
, sc
->sc_if
.if_xname
);
355 gre_evcnt_attach(sc
);
357 gre_bufq_init(&sc
->sc_snd
, 17);
358 sc
->sc_if
.if_flags
|= IFF_LINK0
;
359 if_attach(&sc
->sc_if
);
360 if_alloc_sadl(&sc
->sc_if
);
362 bpfattach(&sc
->sc_if
, DLT_NULL
, sizeof(uint32_t));
364 sc
->sc_state
= GRE_S_IDLE
;
369 gre_clone_destroy(struct ifnet
*ifp
)
372 struct gre_softc
*sc
= ifp
->if_softc
;
374 GRE_DPRINTF(sc
, "\n");
382 /* Some LWPs may still wait in gre_ioctl_lock(), however,
383 * no new LWP will enter gre_ioctl_lock(), because ifunit()
384 * cannot locate the interface any longer.
386 mutex_enter(&sc
->sc_mtx
);
387 GRE_DPRINTF(sc
, "\n");
388 while (sc
->sc_state
!= GRE_S_IDLE
)
390 GRE_DPRINTF(sc
, "\n");
391 sc
->sc_state
= GRE_S_DIE
;
392 cv_broadcast(&sc
->sc_condvar
);
393 while (sc
->sc_waiters
> 0)
394 cv_wait(&sc
->sc_condvar
, &sc
->sc_mtx
);
395 /* At this point, no other LWP will access the gre_softc, so
396 * we can release the mutex.
398 mutex_exit(&sc
->sc_mtx
);
399 GRE_DPRINTF(sc
, "\n");
400 /* Note that we must not hold the mutex while we call gre_reconf(). */
401 gre_reconf(sc
, NULL
);
403 mutex_enter(&sc
->sc_mtx
);
404 sc
->sc_msg
= GRE_M_STOP
;
405 cv_signal(&sc
->sc_fp_condvar
);
406 while (sc
->sc_fp_waiters
> 0)
407 cv_wait(&sc
->sc_fp_condvar
, &sc
->sc_mtx
);
408 mutex_exit(&sc
->sc_mtx
);
412 cv_destroy(&sc
->sc_condvar
);
413 cv_destroy(&sc
->sc_fp_condvar
);
414 mutex_destroy(&sc
->sc_mtx
);
415 gre_evcnt_detach(sc
);
422 gre_receive(struct socket
*so
, void *arg
, int events
, int waitflag
)
424 struct gre_softc
*sc
= (struct gre_softc
*)arg
;
426 const struct gre_h
*gh
;
429 GRE_DPRINTF(sc
, "enter\n");
431 sc
->sc_recv_ev
.ev_count
++;
433 rc
= gre_soreceive(so
, &m
);
434 /* TBD Back off if ECONNREFUSED (indicates
435 * ICMP Port Unreachable)?
437 if (rc
== EWOULDBLOCK
) {
438 GRE_DPRINTF(sc
, "EWOULDBLOCK\n");
439 sc
->sc_block_ev
.ev_count
++;
441 } else if (rc
!= 0 || m
== NULL
) {
442 GRE_DPRINTF(sc
, "%s: rc %d m %p\n",
443 sc
->sc_if
.if_xname
, rc
, (void *)m
);
444 sc
->sc_error_ev
.ev_count
++;
447 if (m
->m_len
< sizeof(*gh
) && (m
= m_pullup(m
, sizeof(*gh
))) == NULL
) {
448 GRE_DPRINTF(sc
, "m_pullup failed\n");
449 sc
->sc_pullup_ev
.ev_count
++;
452 gh
= mtod(m
, const struct gre_h
*);
454 if (gre_input(sc
, m
, 0, gh
) == 0) {
455 sc
->sc_unsupp_ev
.ev_count
++;
456 GRE_DPRINTF(sc
, "dropping unsupported\n");
462 gre_upcall_add(struct socket
*so
, void *arg
)
464 /* XXX What if the kernel already set an upcall? */
465 KASSERT((so
->so_rcv
.sb_flags
& SB_UPCALL
) == 0);
466 so
->so_upcallarg
= arg
;
467 so
->so_upcall
= gre_receive
;
468 so
->so_rcv
.sb_flags
|= SB_UPCALL
;
472 gre_upcall_remove(struct socket
*so
)
474 so
->so_rcv
.sb_flags
&= ~SB_UPCALL
;
475 so
->so_upcallarg
= NULL
;
476 so
->so_upcall
= NULL
;
480 gre_socreate(struct gre_softc
*sc
, const struct gre_soparm
*sp
, int *fdout
)
482 const struct protosw
*pr
;
490 GRE_DPRINTF(sc
, "enter\n");
492 af
= sp
->sp_src
.ss_family
;
493 rc
= fsocreate(af
, NULL
, sp
->sp_type
, sp
->sp_proto
, curlwp
, &fd
);
495 GRE_DPRINTF(sc
, "fsocreate failed\n");
499 if ((rc
= fd_getsock(fd
, &so
)) != 0)
502 if ((m
= getsombuf(so
, MT_SONAME
)) == NULL
) {
506 sa
= mtod(m
, struct sockaddr
*);
507 sockaddr_copy(sa
, MIN(MLEN
, sizeof(sp
->sp_src
)), sstocsa(&sp
->sp_src
));
508 m
->m_len
= sp
->sp_src
.ss_len
;
510 if ((rc
= sobind(so
, m
, curlwp
)) != 0) {
511 GRE_DPRINTF(sc
, "sobind failed\n");
515 sockaddr_copy(sa
, MIN(MLEN
, sizeof(sp
->sp_dst
)), sstocsa(&sp
->sp_dst
));
516 m
->m_len
= sp
->sp_dst
.ss_len
;
519 if ((rc
= soconnect(so
, m
, curlwp
)) != 0) {
520 GRE_DPRINTF(sc
, "soconnect failed\n");
528 /* XXX convert to a (new) SOL_SOCKET call */
531 rc
= so_setsockopt(curlwp
, so
, IPPROTO_IP
, IP_TTL
,
532 &ip_gre_ttl
, sizeof(ip_gre_ttl
));
534 GRE_DPRINTF(sc
, "so_setsockopt ttl failed\n");
539 rc
= so_setsockopt(curlwp
, so
, SOL_SOCKET
, SO_NOHEADER
,
542 GRE_DPRINTF(sc
, "so_setsockopt SO_NOHEADER failed\n");
559 gre_sosend(struct socket
*so
, struct mbuf
*top
)
565 struct lwp
* const l
= curlwp
;
569 resid
= top
->m_pkthdr
.len
;
572 #define snderr(errno) { error = errno; goto release; }
575 if ((error
= sblock(&so
->so_snd
, M_NOWAIT
)) != 0)
577 if (so
->so_state
& SS_CANTSENDMORE
)
580 error
= so
->so_error
;
584 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
585 if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
586 if ((so
->so_state
& SS_ISCONFIRMING
) == 0)
589 snderr(EDESTADDRREQ
);
591 space
= sbspace(&so
->so_snd
);
592 if (resid
> so
->so_snd
.sb_hiwat
)
598 * Data is prepackaged in "top".
600 if (so
->so_state
& SS_CANTSENDMORE
)
602 error
= (*so
->so_proto
->pr_usrreq
)(so
, PRU_SEND
, top
, NULL
, NULL
, l
);
606 sbunlock(&so
->so_snd
);
614 /* This is a stripped-down version of soreceive() that will never
615 * block. It will support SOCK_DGRAM sockets. It may also support
616 * SOCK_SEQPACKET sockets.
619 gre_soreceive(struct socket
*so
, struct mbuf
**mp0
)
621 struct mbuf
*m
, **mp
;
622 int flags
, len
, error
, type
;
623 const struct protosw
*pr
;
624 struct mbuf
*nextrecord
;
626 KASSERT(mp0
!= NULL
);
628 flags
= MSG_DONTWAIT
;
635 KASSERT(pr
->pr_flags
& PR_ATOMIC
);
637 if (so
->so_state
& SS_ISCONFIRMING
)
638 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
, NULL
, NULL
, curlwp
);
640 if ((error
= sblock(&so
->so_rcv
, M_NOWAIT
)) != 0) {
643 m
= so
->so_rcv
.sb_mb
;
645 * If we have less data than requested, do not block awaiting more.
649 if (so
->so_rcv
.sb_cc
)
653 error
= so
->so_error
;
655 } else if (so
->so_state
& SS_CANTRCVMORE
)
657 else if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0
658 && (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
))
665 * On entry here, m points to the first record of the socket buffer.
666 * While we process the initial mbufs containing address and control
667 * info, we save a copy of m->m_nextpkt into nextrecord.
670 curlwp
->l_ru
.ru_msgrcv
++;
671 KASSERT(m
== so
->so_rcv
.sb_mb
);
672 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 1");
673 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 1");
674 nextrecord
= m
->m_nextpkt
;
675 if (pr
->pr_flags
& PR_ADDR
) {
677 if (m
->m_type
!= MT_SONAME
)
680 sbfree(&so
->so_rcv
, m
);
681 MFREE(m
, so
->so_rcv
.sb_mb
);
682 m
= so
->so_rcv
.sb_mb
;
684 while (m
!= NULL
&& m
->m_type
== MT_CONTROL
&& error
== 0) {
685 sbfree(&so
->so_rcv
, m
);
687 * Dispose of any SCM_RIGHTS message that went
688 * through the read path rather than recv.
690 if (pr
->pr_domain
->dom_dispose
&&
691 mtod(m
, struct cmsghdr
*)->cmsg_type
== SCM_RIGHTS
)
692 (*pr
->pr_domain
->dom_dispose
)(m
);
693 MFREE(m
, so
->so_rcv
.sb_mb
);
694 m
= so
->so_rcv
.sb_mb
;
698 * If m is non-NULL, we have some data to read. From now on,
699 * make sure to keep sb_lastrecord consistent when working on
700 * the last packet on the chain (nextrecord == NULL) and we
701 * change m->m_nextpkt.
704 m
->m_nextpkt
= nextrecord
;
706 * If nextrecord == NULL (this is a single chain),
707 * then sb_lastrecord may not be valid here if m
708 * was changed earlier.
710 if (nextrecord
== NULL
) {
711 KASSERT(so
->so_rcv
.sb_mb
== m
);
712 so
->so_rcv
.sb_lastrecord
= m
;
715 if (type
== MT_OOBDATA
)
718 KASSERT(so
->so_rcv
.sb_mb
== m
);
719 so
->so_rcv
.sb_mb
= nextrecord
;
720 SB_EMPTY_FIXUP(&so
->so_rcv
);
722 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 2");
723 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 2");
726 if (m
->m_type
== MT_OOBDATA
) {
727 if (type
!= MT_OOBDATA
)
729 } else if (type
== MT_OOBDATA
)
732 else if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
)
735 so
->so_state
&= ~SS_RCVATMARK
;
736 if (so
->so_oobmark
!= 0 && so
->so_oobmark
< m
->m_len
)
740 * mp is set, just pass back the mbufs.
741 * Sockbuf must be consistent here (points to current mbuf,
742 * it points to next record) when we drop priority;
743 * we must note any additions to the sockbuf when we
744 * block interrupts again.
746 if (m
->m_flags
& M_EOR
)
748 nextrecord
= m
->m_nextpkt
;
749 sbfree(&so
->so_rcv
, m
);
752 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
755 * If m != NULL, we also know that
756 * so->so_rcv.sb_mb != NULL.
758 KASSERT(so
->so_rcv
.sb_mb
== m
);
760 m
->m_nextpkt
= nextrecord
;
761 if (nextrecord
== NULL
)
762 so
->so_rcv
.sb_lastrecord
= m
;
764 so
->so_rcv
.sb_mb
= nextrecord
;
765 SB_EMPTY_FIXUP(&so
->so_rcv
);
767 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 3");
768 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 3");
769 if (so
->so_oobmark
) {
770 so
->so_oobmark
-= len
;
771 if (so
->so_oobmark
== 0) {
772 so
->so_state
|= SS_RCVATMARK
;
784 (void) sbdroprecord(&so
->so_rcv
);
787 * First part is an inline SB_EMPTY_FIXUP(). Second
788 * part makes sure sb_lastrecord is up-to-date if
789 * there is still data in the socket buffer.
791 so
->so_rcv
.sb_mb
= nextrecord
;
792 if (so
->so_rcv
.sb_mb
== NULL
) {
793 so
->so_rcv
.sb_mbtail
= NULL
;
794 so
->so_rcv
.sb_lastrecord
= NULL
;
795 } else if (nextrecord
->m_nextpkt
== NULL
)
796 so
->so_rcv
.sb_lastrecord
= nextrecord
;
798 SBLASTRECORDCHK(&so
->so_rcv
, "soreceive 4");
799 SBLASTMBUFCHK(&so
->so_rcv
, "soreceive 4");
800 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
801 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
,
802 (struct mbuf
*)(long)flags
, NULL
, curlwp
);
803 if (*mp0
== NULL
&& (flags
& MSG_EOR
) == 0 &&
804 (so
->so_state
& SS_CANTRCVMORE
) == 0) {
805 sbunlock(&so
->so_rcv
);
810 sbunlock(&so
->so_rcv
);
814 static struct socket
*
815 gre_reconf(struct gre_softc
*sc
, const struct gre_soparm
*newsoparm
)
817 struct ifnet
*ifp
= &sc
->sc_if
;
819 GRE_DPRINTF(sc
, "enter\n");
822 if (sc
->sc_soparm
.sp_so
!= NULL
) {
823 GRE_DPRINTF(sc
, "\n");
824 gre_upcall_remove(sc
->sc_soparm
.sp_so
);
825 softint_disestablish(sc
->sc_si
);
827 gre_fp_send(sc
, GRE_M_DELFP
, NULL
);
828 gre_clearconf(&sc
->sc_soparm
, false);
831 if (newsoparm
!= NULL
) {
832 GRE_DPRINTF(sc
, "\n");
833 sc
->sc_soparm
= *newsoparm
;
837 if (sc
->sc_soparm
.sp_so
!= NULL
) {
838 GRE_DPRINTF(sc
, "\n");
839 sc
->sc_si
= softint_establish(SOFTINT_NET
, greintr
, sc
);
840 gre_upcall_add(sc
->sc_soparm
.sp_so
, sc
);
841 if ((ifp
->if_flags
& IFF_UP
) == 0) {
842 GRE_DPRINTF(sc
, "down\n");
847 GRE_DPRINTF(sc
, "\n");
848 if (sc
->sc_soparm
.sp_so
!= NULL
)
849 sc
->sc_if
.if_flags
|= IFF_RUNNING
;
851 gre_bufq_purge(&sc
->sc_snd
);
852 sc
->sc_if
.if_flags
&= ~IFF_RUNNING
;
854 return sc
->sc_soparm
.sp_so
;
858 gre_input(struct gre_softc
*sc
, struct mbuf
*m
, int hlen
,
859 const struct gre_h
*gh
)
862 uint32_t af
; /* af passed to BPF tap */
866 sc
->sc_if
.if_ipackets
++;
867 sc
->sc_if
.if_ibytes
+= m
->m_pkthdr
.len
;
869 hlen
+= sizeof(struct gre_h
);
871 /* process GRE flags as packet can be of variable len */
872 flags
= ntohs(gh
->flags
);
874 /* Checksum & Offset are present */
875 if ((flags
& GRE_CP
) | (flags
& GRE_RP
))
877 /* We don't support routing fields (variable length) */
878 if (flags
& GRE_RP
) {
879 sc
->sc_if
.if_ierrors
++;
887 switch (ntohs(gh
->ptype
)) { /* ethertypes */
896 case ETHERTYPE_ATALK
:
909 default: /* others not yet supported */
910 GRE_DPRINTF(sc
, "unhandled ethertype 0x%04x\n",
912 sc
->sc_if
.if_noproto
++;
916 if (hlen
> m
->m_pkthdr
.len
) {
918 sc
->sc_if
.if_ierrors
++;
924 if (sc
->sc_if
.if_bpf
!= NULL
)
925 bpf_mtap_af(sc
->sc_if
.if_bpf
, af
, m
);
926 #endif /*NBPFILTER > 0*/
928 m
->m_pkthdr
.rcvif
= &sc
->sc_if
;
937 /* we need schednetisr since the address family may change */
941 return 1; /* packet is done, no further processing needed */
945 * The output routine. Takes a packet and encapsulates it in the protocol
946 * given by sc->sc_soparm.sp_proto. See also RFC 1701 and RFC 2004
949 gre_output(struct ifnet
*ifp
, struct mbuf
*m
, const struct sockaddr
*dst
,
953 struct gre_softc
*sc
= ifp
->if_softc
;
957 if ((ifp
->if_flags
& (IFF_UP
|IFF_RUNNING
)) != (IFF_UP
|IFF_RUNNING
)) {
964 if (ifp
->if_bpf
!= NULL
)
965 bpf_mtap_af(ifp
->if_bpf
, dst
->sa_family
, m
);
968 m
->m_flags
&= ~(M_BCAST
|M_MCAST
);
970 GRE_DPRINTF(sc
, "dst->sa_family=%d\n", dst
->sa_family
);
971 switch (dst
->sa_family
) {
974 /* TBD Extract the IP ToS field and set the
975 * encapsulating protocol's ToS to suit.
977 etype
= htons(ETHERTYPE_IP
);
982 etype
= htons(ETHERTYPE_ATALK
);
987 etype
= htons(ETHERTYPE_IPV6
);
991 IF_DROP(&ifp
->if_snd
);
993 error
= EAFNOSUPPORT
;
997 M_PREPEND(m
, sizeof(*gh
), M_DONTWAIT
);
1000 IF_DROP(&ifp
->if_snd
);
1005 gh
= mtod(m
, struct gre_h
*);
1008 /* XXX Need to handle IP ToS. Look at how I handle IP TTL. */
1011 ifp
->if_obytes
+= m
->m_pkthdr
.len
;
1014 if ((error
= gre_bufq_enqueue(&sc
->sc_snd
, m
)) != 0) {
1015 sc
->sc_oflow_ev
.ev_count
++;
1018 softint_schedule(sc
->sc_si
);
1026 gre_getname(struct socket
*so
, int req
, struct mbuf
*nam
, struct lwp
*l
)
1028 return (*so
->so_proto
->pr_usrreq
)(so
, req
, NULL
, nam
, NULL
, l
);
1032 gre_getsockname(struct socket
*so
, struct mbuf
*nam
, struct lwp
*l
)
1034 return gre_getname(so
, PRU_SOCKADDR
, nam
, l
);
1038 gre_getpeername(struct socket
*so
, struct mbuf
*nam
, struct lwp
*l
)
1040 return gre_getname(so
, PRU_PEERADDR
, nam
, l
);
1044 gre_getnames(struct socket
*so
, struct lwp
*l
, struct sockaddr_storage
*src
,
1045 struct sockaddr_storage
*dst
)
1048 struct sockaddr_storage
*ss
;
1051 if ((m
= getsombuf(so
, MT_SONAME
)) == NULL
)
1054 ss
= mtod(m
, struct sockaddr_storage
*);
1057 if ((rc
= gre_getsockname(so
, m
, l
)) != 0)
1061 if ((rc
= gre_getpeername(so
, m
, l
)) != 0)
1071 gre_fp_recvloop(void *arg
)
1073 struct gre_softc
*sc
= arg
;
1075 mutex_enter(&sc
->sc_mtx
);
1076 while (gre_fp_recv(sc
))
1078 mutex_exit(&sc
->sc_mtx
);
1083 gre_fp_recv(struct gre_softc
*sc
)
1092 switch (sc
->sc_msg
) {
1094 cv_signal(&sc
->sc_fp_condvar
);
1097 mutex_exit(&sc
->sc_mtx
);
1098 rc
= fd_dup(fp
, 0, &fd
, 0);
1099 mutex_enter(&sc
->sc_mtx
);
1101 sc
->sc_msg
= GRE_M_ERR
;
1106 mutex_exit(&sc
->sc_mtx
);
1107 if (ofd
!= -1 && fd_getfile(ofd
) != NULL
)
1109 mutex_enter(&sc
->sc_mtx
);
1111 sc
->sc_msg
= GRE_M_OK
;
1117 cv_signal(&sc
->sc_fp_condvar
);
1122 gre_fp_send(struct gre_softc
*sc
, enum gre_msg msg
, file_t
*fp
)
1126 mutex_enter(&sc
->sc_mtx
);
1127 while (sc
->sc_msg
!= GRE_M_NONE
)
1131 cv_signal(&sc
->sc_fp_condvar
);
1132 while (sc
->sc_msg
!= GRE_M_STOP
&& sc
->sc_msg
!= GRE_M_OK
&&
1133 sc
->sc_msg
!= GRE_M_ERR
)
1135 rc
= (sc
->sc_msg
!= GRE_M_ERR
);
1136 sc
->sc_msg
= GRE_M_NONE
;
1137 cv_signal(&sc
->sc_fp_condvar
);
1138 mutex_exit(&sc
->sc_mtx
);
1143 gre_ssock(struct ifnet
*ifp
, struct gre_soparm
*sp
, int fd
)
1146 const struct protosw
*pr
;
1148 struct gre_softc
*sc
= ifp
->if_softc
;
1150 struct sockaddr_storage dst
, src
;
1152 if ((fp
= fd_getfile(fd
)) == NULL
)
1154 if (fp
->f_type
!= DTYPE_SOCKET
) {
1159 GRE_DPRINTF(sc
, "\n");
1161 so
= (struct socket
*)fp
->f_data
;
1164 GRE_DPRINTF(sc
, "type %d, proto %d\n", pr
->pr_type
, pr
->pr_protocol
);
1166 if ((pr
->pr_flags
& PR_ATOMIC
) == 0 ||
1167 (sp
->sp_type
!= 0 && pr
->pr_type
!= sp
->sp_type
) ||
1168 (sp
->sp_proto
!= 0 && pr
->pr_protocol
!= 0 &&
1169 pr
->pr_protocol
!= sp
->sp_proto
)) {
1174 GRE_DPRINTF(sc
, "\n");
1177 if ((error
= gre_getnames(so
, curlwp
, &src
, &dst
)) != 0)
1180 GRE_DPRINTF(sc
, "\n");
1182 if (!gre_fp_send(sc
, GRE_M_SETFP
, fp
)) {
1187 GRE_DPRINTF(sc
, "\n");
1200 sockaddr_is_anyaddr(const struct sockaddr
*sa
)
1202 socklen_t anylen
, salen
;
1203 const void *anyaddr
, *addr
;
1205 if ((anyaddr
= sockaddr_anyaddr(sa
, &anylen
)) == NULL
||
1206 (addr
= sockaddr_const_addr(sa
, &salen
)) == NULL
)
1212 return memcmp(anyaddr
, addr
, MIN(anylen
, salen
)) == 0;
1216 gre_is_nullconf(const struct gre_soparm
*sp
)
1218 return sockaddr_is_anyaddr(sstocsa(&sp
->sp_src
)) ||
1219 sockaddr_is_anyaddr(sstocsa(&sp
->sp_dst
));
1223 gre_clearconf(struct gre_soparm
*sp
, bool force
)
1225 if (sp
->sp_bysock
|| force
) {
1226 sockaddr_copy(sstosa(&sp
->sp_src
), sizeof(sp
->sp_src
),
1227 sockaddr_any(sstosa(&sp
->sp_src
)));
1228 sockaddr_copy(sstosa(&sp
->sp_dst
), sizeof(sp
->sp_dst
),
1229 sockaddr_any(sstosa(&sp
->sp_dst
)));
1230 sp
->sp_bysock
= false;
1232 sp
->sp_so
= NULL
; /* XXX */
1236 gre_ioctl_lock(struct gre_softc
*sc
)
1238 mutex_enter(&sc
->sc_mtx
);
1240 while (sc
->sc_state
== GRE_S_IOCTL
)
1243 if (sc
->sc_state
!= GRE_S_IDLE
) {
1244 cv_signal(&sc
->sc_condvar
);
1245 mutex_exit(&sc
->sc_mtx
);
1246 GRE_DPRINTF(sc
, "\n");
1250 sc
->sc_state
= GRE_S_IOCTL
;
1252 mutex_exit(&sc
->sc_mtx
);
1257 gre_ioctl_unlock(struct gre_softc
*sc
)
1259 mutex_enter(&sc
->sc_mtx
);
1261 KASSERT(sc
->sc_state
== GRE_S_IOCTL
);
1262 sc
->sc_state
= GRE_S_IDLE
;
1263 cv_signal(&sc
->sc_condvar
);
1265 mutex_exit(&sc
->sc_mtx
);
1269 gre_ioctl(struct ifnet
*ifp
, const u_long cmd
, void *data
)
1272 struct if_laddrreq
*lifr
= (struct if_laddrreq
*)data
;
1273 struct gre_softc
*sc
= ifp
->if_softc
;
1274 struct gre_soparm
*sp
;
1275 int fd
, error
= 0, oproto
, otype
, s
;
1276 struct gre_soparm sp0
;
1280 GRE_DPRINTF(sc
, "cmd %lu\n", cmd
);
1290 case SIOCSLIFPHYADDR
:
1291 case SIOCDIFPHYADDR
:
1292 if (kauth_authorize_network(curlwp
->l_cred
,
1293 KAUTH_NETWORK_INTERFACE
,
1294 KAUTH_REQ_NETWORK_INTERFACE_SETPRIV
, ifp
, (void *)cmd
,
1302 if ((error
= gre_ioctl_lock(sc
)) != 0) {
1303 GRE_DPRINTF(sc
, "\n");
1308 sp0
= sc
->sc_soparm
;
1312 GRE_DPRINTF(sc
, "\n");
1315 case SIOCINITIFADDR
:
1316 GRE_DPRINTF(sc
, "\n");
1317 if ((ifp
->if_flags
& IFF_UP
) != 0)
1319 gre_clearconf(sp
, false);
1320 ifp
->if_flags
|= IFF_UP
;
1322 case SIOCSIFDSTADDR
:
1325 if ((error
= ifioctl_common(ifp
, cmd
, data
)) != 0)
1327 oproto
= sp
->sp_proto
;
1328 otype
= sp
->sp_type
;
1329 switch (ifr
->ifr_flags
& (IFF_LINK0
|IFF_LINK2
)) {
1330 case IFF_LINK0
|IFF_LINK2
:
1331 sp
->sp_proto
= IPPROTO_UDP
;
1332 sp
->sp_type
= SOCK_DGRAM
;
1339 sp
->sp_proto
= IPPROTO_GRE
;
1340 sp
->sp_type
= SOCK_RAW
;
1343 GRE_DPRINTF(sc
, "\n");
1347 GRE_DPRINTF(sc
, "\n");
1348 gre_clearconf(sp
, false);
1349 if ((ifp
->if_flags
& (IFF_UP
|IFF_RUNNING
)) ==
1350 (IFF_UP
|IFF_RUNNING
) &&
1351 (oproto
== sp
->sp_proto
|| sp
->sp_proto
== 0) &&
1352 (otype
== sp
->sp_type
|| sp
->sp_type
== 0))
1354 switch (sp
->sp_proto
) {
1363 /* XXX determine MTU automatically by probing w/
1364 * XXX do-not-fragment packets?
1366 if (ifr
->ifr_mtu
< 576) {
1372 if ((error
= ifioctl_common(ifp
, cmd
, data
)) == ENETRESET
)
1378 error
= EAFNOSUPPORT
;
1381 switch (ifreq_getaddr(cmd
, ifr
)->sa_family
) {
1391 error
= EAFNOSUPPORT
;
1396 gre_clearconf(sp
, false);
1397 oproto
= sp
->sp_proto
;
1398 otype
= sp
->sp_type
;
1399 sp
->sp_proto
= ifr
->ifr_flags
;
1400 switch (sp
->sp_proto
) {
1402 ifp
->if_flags
|= IFF_LINK0
|IFF_LINK2
;
1403 sp
->sp_type
= SOCK_DGRAM
;
1406 ifp
->if_flags
|= IFF_LINK0
;
1407 ifp
->if_flags
&= ~IFF_LINK2
;
1408 sp
->sp_type
= SOCK_RAW
;
1411 ifp
->if_flags
&= ~IFF_LINK0
;
1412 ifp
->if_flags
|= IFF_LINK2
;
1416 error
= EPROTONOSUPPORT
;
1419 if ((oproto
== sp
->sp_proto
|| sp
->sp_proto
== 0) &&
1420 (otype
== sp
->sp_type
|| sp
->sp_type
== 0))
1422 switch (sp
->sp_proto
) {
1431 ifr
->ifr_flags
= sp
->sp_proto
;
1435 gre_clearconf(sp
, false);
1436 /* set tunnel endpoints and mark interface as up */
1439 sockaddr_copy(sstosa(&sp
->sp_src
),
1440 sizeof(sp
->sp_src
), ifreq_getaddr(cmd
, ifr
));
1443 sockaddr_copy(sstosa(&sp
->sp_dst
),
1444 sizeof(sp
->sp_dst
), ifreq_getaddr(cmd
, ifr
));
1448 if (sockaddr_any(sstosa(&sp
->sp_src
)) == NULL
||
1449 sockaddr_any(sstosa(&sp
->sp_dst
)) == NULL
) {
1453 /* let gre_socreate() check the rest */
1455 GRE_DPRINTF(sc
, "\n");
1456 /* If we're administratively down, or the configuration
1457 * is empty, there's no use creating a socket.
1459 if ((ifp
->if_flags
& IFF_UP
) == 0 || gre_is_nullconf(sp
))
1462 GRE_DPRINTF(sc
, "\n");
1464 error
= gre_socreate(sc
, sp
, &fd
);
1469 GRE_DPRINTF(sc
, "\n");
1471 error
= gre_ssock(ifp
, sp
, fd
);
1473 if (cmd
!= GRESSOCK
) {
1474 GRE_DPRINTF(sc
, "\n");
1476 if (fd_getfile(fd
) != NULL
)
1482 GRE_DPRINTF(sc
, "\n");
1483 ifp
->if_flags
&= ~IFF_RUNNING
;
1489 ifreq_setaddr(cmd
, ifr
, sstosa(&sp
->sp_src
));
1492 ifreq_setaddr(cmd
, ifr
, sstosa(&sp
->sp_dst
));
1495 GRE_DPRINTF(sc
, "\n");
1497 ifp
->if_flags
&= ~IFF_UP
;
1498 gre_clearconf(sp
, false);
1501 GRE_DPRINTF(sc
, "\n");
1502 gre_clearconf(sp
, true);
1503 fd
= (int)ifr
->ifr_value
;
1504 sp
->sp_bysock
= true;
1505 ifp
->if_flags
|= IFF_UP
;
1507 case SIOCSLIFPHYADDR
:
1508 GRE_DPRINTF(sc
, "\n");
1509 if (lifr
->addr
.ss_family
!= lifr
->dstaddr
.ss_family
) {
1510 error
= EAFNOSUPPORT
;
1513 sockaddr_copy(sstosa(&sp
->sp_src
), sizeof(sp
->sp_src
),
1514 sstosa(&lifr
->addr
));
1515 sockaddr_copy(sstosa(&sp
->sp_dst
), sizeof(sp
->sp_dst
),
1516 sstosa(&lifr
->dstaddr
));
1517 GRE_DPRINTF(sc
, "\n");
1519 case SIOCDIFPHYADDR
:
1520 GRE_DPRINTF(sc
, "\n");
1521 gre_clearconf(sp
, true);
1522 ifp
->if_flags
&= ~IFF_UP
;
1524 case SIOCGLIFPHYADDR
:
1525 GRE_DPRINTF(sc
, "\n");
1526 if (gre_is_nullconf(sp
)) {
1527 error
= EADDRNOTAVAIL
;
1530 sockaddr_copy(sstosa(&lifr
->addr
), sizeof(lifr
->addr
),
1531 sstosa(&sp
->sp_src
));
1532 sockaddr_copy(sstosa(&lifr
->dstaddr
), sizeof(lifr
->dstaddr
),
1533 sstosa(&sp
->sp_dst
));
1534 GRE_DPRINTF(sc
, "\n");
1537 error
= ifioctl_common(ifp
, cmd
, data
);
1541 GRE_DPRINTF(sc
, "\n");
1543 gre_ioctl_unlock(sc
);
1547 void greattach(int);
1551 greattach(int count
)
1553 if_clone_attach(&gre_cloner
);