1 /* $NetBSD: linux_socket.c,v 1.106 2009/11/13 22:39:35 joerg Exp $ */
4 * Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Frank van der Linden and Eric Haszlakiewicz.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
33 * Functions in multiarch:
34 * linux_sys_socketcall : linux_socketcall.c
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: linux_socket.c,v 1.106 2009/11/13 22:39:35 joerg Exp $");
40 #if defined(_KERNEL_OPT)
42 #endif /* defined(_KERNEL_OPT) */
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/systm.h>
48 #include <sys/malloc.h>
49 #include <sys/ioctl.h>
52 #include <sys/filedesc.h>
53 #include <sys/select.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/domain.h>
58 #include <net/if_dl.h>
59 #include <net/if_types.h>
60 #include <netinet/in.h>
61 #include <netinet/tcp.h>
62 #include <sys/mount.h>
64 #include <sys/vnode.h>
65 #include <sys/device.h>
66 #include <sys/protosw.h>
68 #include <sys/syslog.h>
70 #include <sys/kauth.h>
71 #include <sys/syscallargs.h>
72 #include <sys/ktrace.h>
74 #include <lib/libkern/libkern.h>
76 #include <netinet/ip6.h>
77 #include <netinet6/ip6_var.h>
79 #include <compat/sys/socket.h>
80 #include <compat/sys/sockio.h>
82 #include <compat/linux/common/linux_types.h>
83 #include <compat/linux/common/linux_util.h>
84 #include <compat/linux/common/linux_signal.h>
85 #include <compat/linux/common/linux_ioctl.h>
86 #include <compat/linux/common/linux_socket.h>
87 #if !defined(__alpha__) && !defined(__amd64__)
88 #include <compat/linux/common/linux_socketcall.h>
90 #include <compat/linux/common/linux_sockio.h>
91 #include <compat/linux/common/linux_ipc.h>
92 #include <compat/linux/common/linux_sem.h>
94 #include <compat/linux/linux_syscallargs.h>
97 #define DPRINTF(a) uprintf a
103 * The calls in this file are entered either via the linux_socketcall()
104 * interface or, on the Alpha, as individual syscalls. The
105 * linux_socketcall function does any massaging of arguments so that all
106 * the calls in here need not think that they are anything other
107 * than a normal syscall.
110 static int linux_to_bsd_domain(int);
111 static int bsd_to_linux_domain(int);
112 int linux_to_bsd_sopt_level(int);
113 int linux_to_bsd_so_sockopt(int);
114 int linux_to_bsd_ip_sockopt(int);
115 int linux_to_bsd_tcp_sockopt(int);
116 int linux_to_bsd_udp_sockopt(int);
117 int linux_getifname(struct lwp
*, register_t
*, void *);
118 int linux_getifconf(struct lwp
*, register_t
*, void *);
119 int linux_getifhwaddr(struct lwp
*, register_t
*, u_int
, void *);
120 static int linux_get_sa(struct lwp
*, int, struct mbuf
**,
121 const struct osockaddr
*, unsigned int);
122 static int linux_sa_put(struct osockaddr
*osa
);
123 static int linux_to_bsd_msg_flags(int);
124 static int bsd_to_linux_msg_flags(int);
125 static void linux_to_bsd_msghdr(struct linux_msghdr
*, struct msghdr
*);
126 static void bsd_to_linux_msghdr(struct msghdr
*, struct linux_msghdr
*);
128 static const int linux_to_bsd_domain_
[LINUX_AF_MAX
] = {
132 AF_CCITT
, /* LINUX_AF_AX25 */
135 -1, /* LINUX_AF_NETROM */
136 -1, /* LINUX_AF_BRIDGE */
137 -1, /* LINUX_AF_ATMPVC */
138 AF_CCITT
, /* LINUX_AF_X25 */
140 -1, /* LINUX_AF_ROSE */
142 -1, /* LINUX_AF_NETBEUI */
143 -1, /* LINUX_AF_SECURITY */
145 AF_ROUTE
, /* LINUX_AF_NETLINK */
146 -1, /* LINUX_AF_PACKET */
147 -1, /* LINUX_AF_ASH */
148 -1, /* LINUX_AF_ECONET */
149 -1, /* LINUX_AF_ATMSVC */
151 /* rest up to LINUX_AF_MAX-1 is not allocated */
152 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
155 static const int bsd_to_linux_domain_
[AF_MAX
] = {
166 LINUX_AF_AX25
, /* AF_CCITT */
178 -1, /* pseudo_AF_RTIP */
181 -1, /* pseudo_AF_PIP */
186 -1, /* pseudo_AF_HDRCMPLT */
189 static const struct {
192 } bsd_to_linux_msg_flags_
[] = {
193 {MSG_OOB
, LINUX_MSG_OOB
},
194 {MSG_PEEK
, LINUX_MSG_PEEK
},
195 {MSG_DONTROUTE
, LINUX_MSG_DONTROUTE
},
196 {MSG_EOR
, LINUX_MSG_EOR
},
197 {MSG_TRUNC
, LINUX_MSG_TRUNC
},
198 {MSG_CTRUNC
, LINUX_MSG_CTRUNC
},
199 {MSG_WAITALL
, LINUX_MSG_WAITALL
},
200 {MSG_DONTWAIT
, LINUX_MSG_DONTWAIT
},
201 {MSG_BCAST
, 0}, /* not supported, clear */
202 {MSG_MCAST
, 0}, /* not supported, clear */
203 {-1, /* not supp */ LINUX_MSG_PROBE
},
204 {-1, /* not supp */ LINUX_MSG_FIN
},
205 {-1, /* not supp */ LINUX_MSG_SYN
},
206 {-1, /* not supp */ LINUX_MSG_CONFIRM
},
207 {-1, /* not supp */ LINUX_MSG_RST
},
208 {-1, /* not supp */ LINUX_MSG_ERRQUEUE
},
209 {-1, /* not supp */ LINUX_MSG_NOSIGNAL
},
210 {-1, /* not supp */ LINUX_MSG_MORE
},
214 * Convert between Linux and BSD socket domain values
217 linux_to_bsd_domain(int ldom
)
219 if (ldom
< 0 || ldom
>= LINUX_AF_MAX
)
222 return linux_to_bsd_domain_
[ldom
];
226 * Convert between BSD and Linux socket domain values
229 bsd_to_linux_domain(int bdom
)
231 if (bdom
< 0 || bdom
>= AF_MAX
)
234 return bsd_to_linux_domain_
[bdom
];
238 linux_to_bsd_msg_flags(int lflag
)
246 for(i
= 0; i
< __arraycount(bsd_to_linux_msg_flags_
); i
++) {
247 bfl
= bsd_to_linux_msg_flags_
[i
].bfl
;
248 lfl
= bsd_to_linux_msg_flags_
[i
].lfl
;
265 bsd_to_linux_msg_flags(int bflag
)
273 for(i
= 0; i
< __arraycount(bsd_to_linux_msg_flags_
); i
++) {
274 bfl
= bsd_to_linux_msg_flags_
[i
].bfl
;
275 lfl
= bsd_to_linux_msg_flags_
[i
].lfl
;
292 linux_sys_socket(struct lwp
*l
, const struct linux_sys_socket_args
*uap
, register_t
*retval
)
295 syscallarg(int) domain;
296 syscallarg(int) type;
297 syscallarg(int) protocol;
299 struct sys___socket30_args bsa
;
302 SCARG(&bsa
, protocol
) = SCARG(uap
, protocol
);
303 SCARG(&bsa
, type
) = SCARG(uap
, type
);
304 SCARG(&bsa
, domain
) = linux_to_bsd_domain(SCARG(uap
, domain
));
305 if (SCARG(&bsa
, domain
) == -1)
307 error
= sys___socket30(l
, &bsa
, retval
);
311 * Linux AF_INET6 socket has IPV6_V6ONLY setsockopt set to 0 by
312 * default and some apps depend on this. So, set V6ONLY to 0
313 * for Linux apps if the sysctl value is set to 1.
315 if (!error
&& ip6_v6only
&& SCARG(&bsa
, domain
) == PF_INET6
) {
318 if (fd_getsock(*retval
, &so
) == 0) {
322 (void)so_setsockopt(l
, so
, IPPROTO_IPV6
, IPV6_V6ONLY
,
334 linux_sys_socketpair(struct lwp
*l
, const struct linux_sys_socketpair_args
*uap
, register_t
*retval
)
337 syscallarg(int) domain;
338 syscallarg(int) type;
339 syscallarg(int) protocol;
340 syscallarg(int *) rsv;
342 struct sys_socketpair_args bsa
;
344 SCARG(&bsa
, domain
) = linux_to_bsd_domain(SCARG(uap
, domain
));
345 if (SCARG(&bsa
, domain
) == -1)
347 SCARG(&bsa
, type
) = SCARG(uap
, type
);
348 SCARG(&bsa
, protocol
) = SCARG(uap
, protocol
);
349 SCARG(&bsa
, rsv
) = SCARG(uap
, rsv
);
351 return sys_socketpair(l
, &bsa
, retval
);
355 linux_sys_sendto(struct lwp
*l
, const struct linux_sys_sendto_args
*uap
, register_t
*retval
)
359 syscallarg(void *) msg;
361 syscallarg(int) flags;
362 syscallarg(struct osockaddr *) to;
363 syscallarg(int) tolen;
371 /* Translate message flags. */
372 bflags
= linux_to_bsd_msg_flags(SCARG(uap
, flags
));
374 /* Some supported flag */
379 msg
.msg_control
= NULL
;
381 if (SCARG(uap
, tolen
)) {
382 /* Read in and convert the sockaddr */
383 error
= linux_get_sa(l
, SCARG(uap
, s
), &nam
, SCARG(uap
, to
),
387 msg
.msg_flags
|= MSG_NAMEMBUF
;
389 msg
.msg_namelen
= SCARG(uap
, tolen
);
394 aiov
.iov_base
= __UNCONST(SCARG(uap
, msg
));
395 aiov
.iov_len
= SCARG(uap
, len
);
397 return do_sys_sendmsg(l
, SCARG(uap
, s
), &msg
, bflags
, retval
);
401 linux_to_bsd_msghdr(struct linux_msghdr
*lmsg
, struct msghdr
*bmsg
)
403 bmsg
->msg_name
= lmsg
->msg_name
;
404 bmsg
->msg_namelen
= lmsg
->msg_namelen
;
405 bmsg
->msg_iov
= lmsg
->msg_iov
;
406 bmsg
->msg_iovlen
= lmsg
->msg_iovlen
;
407 bmsg
->msg_control
= lmsg
->msg_control
;
408 bmsg
->msg_controllen
= lmsg
->msg_controllen
;
409 bmsg
->msg_flags
= lmsg
->msg_flags
;
413 bsd_to_linux_msghdr(struct msghdr
*bmsg
, struct linux_msghdr
*lmsg
)
415 lmsg
->msg_name
= bmsg
->msg_name
;
416 lmsg
->msg_namelen
= bmsg
->msg_namelen
;
417 lmsg
->msg_iov
= bmsg
->msg_iov
;
418 lmsg
->msg_iovlen
= bmsg
->msg_iovlen
;
419 lmsg
->msg_control
= bmsg
->msg_control
;
420 lmsg
->msg_controllen
= bmsg
->msg_controllen
;
421 lmsg
->msg_flags
= bmsg
->msg_flags
;
425 linux_sys_sendmsg(struct lwp
*l
, const struct linux_sys_sendmsg_args
*uap
, register_t
*retval
)
429 syscallarg(struct linux_msghdr *) msg;
430 syscallarg(u_int) flags;
433 struct linux_msghdr lmsg
;
438 struct mbuf
*ctl_mbuf
= NULL
;
440 error
= copyin(SCARG(uap
, msg
), &lmsg
, sizeof(lmsg
));
443 linux_to_bsd_msghdr(&lmsg
, &msg
);
445 msg
.msg_flags
= MSG_IOVUSRSPACE
;
448 * Translate message flags.
450 bflags
= linux_to_bsd_msg_flags(SCARG(uap
, flags
));
452 /* Some supported flag */
456 /* Read in and convert the sockaddr */
457 error
= linux_get_sa(l
, SCARG(uap
, s
), &nam
, msg
.msg_name
,
461 msg
.msg_flags
|= MSG_NAMEMBUF
;
466 * Handle cmsg if there is any.
468 if (LINUX_CMSG_FIRSTHDR(&lmsg
)) {
469 struct linux_cmsghdr l_cmsg
, *l_cc
;
470 struct cmsghdr
*cmsg
;
471 ssize_t resid
= msg
.msg_controllen
;
472 size_t clen
, cidx
= 0, cspace
;
474 ctl_mbuf
= m_get(M_WAIT
, MT_CONTROL
);
476 control
= mtod(ctl_mbuf
, void *);
478 l_cc
= LINUX_CMSG_FIRSTHDR(&lmsg
);
480 error
= copyin(l_cc
, &l_cmsg
, sizeof(l_cmsg
));
485 * Sanity check the control message length.
487 if (l_cmsg
.cmsg_len
> resid
488 || l_cmsg
.cmsg_len
< sizeof l_cmsg
) {
494 * Refuse unsupported control messages, and
495 * translate fields as appropriate.
497 switch (l_cmsg
.cmsg_level
) {
498 case LINUX_SOL_SOCKET
:
499 /* It only differs on some archs */
500 if (LINUX_SOL_SOCKET
!= SOL_SOCKET
)
501 l_cmsg
.cmsg_level
= SOL_SOCKET
;
503 switch(l_cmsg
.cmsg_type
) {
504 case LINUX_SCM_RIGHTS
:
505 /* Linux SCM_RIGHTS is same as NetBSD */
509 /* other types not supported */
515 /* pray and leave intact */
519 cspace
= CMSG_SPACE(l_cmsg
.cmsg_len
- sizeof(l_cmsg
));
521 /* Check the buffer is big enough */
522 if (__predict_false(cidx
+ cspace
> clen
)) {
525 clen
= cidx
+ cspace
;
526 if (clen
>= PAGE_SIZE
) {
530 nc
= realloc(clen
<= MLEN
? NULL
: control
,
531 clen
, M_TEMP
, M_WAITOK
);
537 /* Old buffer was in mbuf... */
538 memcpy(nc
, control
, cidx
);
543 cmsg
= (void *)&control
[cidx
];
544 cmsg
->cmsg_len
= l_cmsg
.cmsg_len
+ LINUX_CMSG_ALIGN_DELTA
;
545 cmsg
->cmsg_level
= l_cmsg
.cmsg_level
;
546 cmsg
->cmsg_type
= l_cmsg
.cmsg_type
;
548 /* Zero are between header and data */
550 CMSG_ALIGN(sizeof(cmsg
)) - sizeof(cmsg
));
552 /* Copyin the data */
553 error
= copyin(LINUX_CMSG_DATA(l_cc
),
555 l_cmsg
.cmsg_len
- sizeof(l_cmsg
));
559 resid
-= LINUX_CMSG_ALIGN(l_cmsg
.cmsg_len
);
561 } while ((l_cc
= LINUX_CMSG_NXTHDR(&msg
, l_cc
)) && resid
> 0);
563 /* If we allocated a buffer, attach to mbuf */
565 MEXTADD(ctl_mbuf
, control
, clen
, M_MBUF
, NULL
, NULL
);
566 ctl_mbuf
->m_flags
|= M_EXT_RW
;
569 ctl_mbuf
->m_len
= cidx
;
571 msg
.msg_control
= ctl_mbuf
;
572 msg
.msg_flags
|= MSG_CONTROLMBUF
;
574 ktrkuser("msgcontrol", mtod(ctl_mbuf
, void *),
578 error
= do_sys_sendmsg(l
, SCARG(uap
, s
), &msg
, bflags
, retval
);
579 /* Freed internally */
583 if (ctl_mbuf
!= NULL
) {
584 if (control
!= NULL
&& control
!= mtod(ctl_mbuf
, void *))
585 free(control
, M_MBUF
);
592 linux_sys_recvfrom(struct lwp
*l
, const struct linux_sys_recvfrom_args
*uap
, register_t
*retval
)
596 syscallarg(void *) buf;
598 syscallarg(int) flags;
599 syscallarg(struct osockaddr *) from;
600 syscallarg(int *) fromlenaddr;
603 struct sys_recvfrom_args bra
;
605 SCARG(&bra
, s
) = SCARG(uap
, s
);
606 SCARG(&bra
, buf
) = SCARG(uap
, buf
);
607 SCARG(&bra
, len
) = SCARG(uap
, len
);
608 SCARG(&bra
, flags
) = SCARG(uap
, flags
);
609 SCARG(&bra
, from
) = (struct sockaddr
*) SCARG(uap
, from
);
610 SCARG(&bra
, fromlenaddr
) = (socklen_t
*)SCARG(uap
, fromlenaddr
);
612 if ((error
= sys_recvfrom(l
, &bra
, retval
)))
615 if (SCARG(uap
, from
) && (error
= linux_sa_put(SCARG(uap
, from
))))
622 linux_copyout_msg_control(struct lwp
*l
, struct msghdr
*mp
, struct mbuf
*control
)
625 struct cmsghdr
*cmsg
;
626 struct linux_cmsghdr linux_cmsg
;
630 if (mp
->msg_controllen
<= 0 || control
== 0) {
631 mp
->msg_controllen
= 0;
632 free_control_mbuf(l
, control
, control
);
636 ktrkuser("msgcontrol", mtod(control
, void *), mp
->msg_controllen
);
638 q
= (char *)mp
->msg_control
;
639 q_end
= q
+ mp
->msg_controllen
;
641 for (m
= control
; m
!= NULL
; ) {
642 cmsg
= mtod(m
, struct cmsghdr
*);
645 * Fixup cmsg. We handle two things:
646 * 0. different sizeof cmsg_len.
647 * 1. different values for level/type on some archs
648 * 2. different alignment of CMSG_DATA on some archs
650 linux_cmsg
.cmsg_len
= cmsg
->cmsg_len
- LINUX_CMSG_ALIGN_DELTA
;
651 linux_cmsg
.cmsg_level
= cmsg
->cmsg_level
;
652 linux_cmsg
.cmsg_type
= cmsg
->cmsg_type
;
655 if (linux_cmsg
.cmsg_len
> dlen
) {
656 /* Not enough room for the parameter */
657 dlen
-= sizeof linux_cmsg
;
659 /* Discard if header wont fit */
661 mp
->msg_flags
|= MSG_CTRUNC
;
662 if (linux_cmsg
.cmsg_level
== SOL_SOCKET
663 && linux_cmsg
.cmsg_type
== SCM_RIGHTS
)
664 /* Do not truncate me ... */
667 dlen
= linux_cmsg
.cmsg_len
- sizeof linux_cmsg
;
669 switch (linux_cmsg
.cmsg_level
) {
671 linux_cmsg
.cmsg_level
= LINUX_SOL_SOCKET
;
672 switch (linux_cmsg
.cmsg_type
) {
674 /* Linux SCM_RIGHTS is same as NetBSD */
678 /* other types not supported */
682 /* machine dependant ! */
685 /* pray and leave intact */
689 /* There can be padding between the header and data... */
690 error
= copyout(&linux_cmsg
, q
, sizeof linux_cmsg
);
692 error
= copyout(CCMSG_DATA(cmsg
), q
+ sizeof linux_cmsg
,
696 /* We must free all the SCM_RIGHTS */
701 if (m
== NULL
|| q
+ LINUX_CMSG_SPACE(dlen
) > q_end
) {
702 q
+= LINUX_CMSG_LEN(dlen
);
705 q
+= LINUX_CMSG_SPACE(dlen
);
709 free_control_mbuf(l
, control
, m
);
711 mp
->msg_controllen
= q
- (char *)mp
->msg_control
;
716 linux_sys_recvmsg(struct lwp
*l
, const struct linux_sys_recvmsg_args
*uap
, register_t
*retval
)
720 syscallarg(struct linux_msghdr *) msg;
721 syscallarg(u_int) flags;
724 struct linux_msghdr lmsg
;
726 struct mbuf
*from
, *control
;
728 error
= copyin(SCARG(uap
, msg
), &lmsg
, sizeof(lmsg
));
731 linux_to_bsd_msghdr(&lmsg
, &msg
);
733 msg
.msg_flags
= linux_to_bsd_msg_flags(SCARG(uap
, flags
));
734 if (msg
.msg_flags
< 0) {
735 /* Some unsupported flag */
738 msg
.msg_flags
|= MSG_IOVUSRSPACE
;
740 error
= do_sys_recvmsg(l
, SCARG(uap
, s
), &msg
, &from
,
741 msg
.msg_control
!= NULL
? &control
: NULL
, retval
);
745 if (msg
.msg_control
!= NULL
)
746 error
= linux_copyout_msg_control(l
, &msg
, control
);
748 if (error
== 0 && from
!= 0) {
749 mtod(from
, struct osockaddr
*)->sa_family
=
750 bsd_to_linux_domain(mtod(from
, struct sockaddr
*)->sa_family
);
751 error
= copyout_sockname(msg
.msg_name
, &msg
.msg_namelen
, 0,
760 msg
.msg_flags
= bsd_to_linux_msg_flags(msg
.msg_flags
);
761 if (msg
.msg_flags
< 0)
762 /* Some flag unsupported by Linux */
765 ktrkuser("msghdr", &msg
, sizeof(msg
));
766 bsd_to_linux_msghdr(&msg
, &lmsg
);
767 error
= copyout(&lmsg
, SCARG(uap
, msg
), sizeof(lmsg
));
775 * Convert socket option level from Linux to NetBSD value. Only SOL_SOCKET
776 * is different, the rest matches IPPROTO_* on both systems.
779 linux_to_bsd_sopt_level(int llevel
)
783 case LINUX_SOL_SOCKET
:
797 * Convert Linux socket level socket option numbers to NetBSD values.
800 linux_to_bsd_so_sockopt(int lopt
)
806 case LINUX_SO_REUSEADDR
:
808 * Linux does not implement SO_REUSEPORT, but allows reuse of a
809 * host:port pair through SO_REUSEADDR even if the address is not a
810 * multicast-address. Effectively, this means that we should use
811 * SO_REUSEPORT to allow Linux applications to not exit with
819 case LINUX_SO_DONTROUTE
:
821 case LINUX_SO_BROADCAST
:
823 case LINUX_SO_SNDBUF
:
825 case LINUX_SO_RCVBUF
:
827 case LINUX_SO_KEEPALIVE
:
829 case LINUX_SO_OOBINLINE
:
831 case LINUX_SO_LINGER
:
833 case LINUX_SO_PRIORITY
:
834 case LINUX_SO_NO_CHECK
:
841 * Convert Linux IP level socket option number to NetBSD values.
844 linux_to_bsd_ip_sockopt(int lopt
)
852 case LINUX_IP_HDRINCL
:
854 case LINUX_IP_MULTICAST_TTL
:
855 return IP_MULTICAST_TTL
;
856 case LINUX_IP_MULTICAST_LOOP
:
857 return IP_MULTICAST_LOOP
;
858 case LINUX_IP_MULTICAST_IF
:
859 return IP_MULTICAST_IF
;
860 case LINUX_IP_ADD_MEMBERSHIP
:
861 return IP_ADD_MEMBERSHIP
;
862 case LINUX_IP_DROP_MEMBERSHIP
:
863 return IP_DROP_MEMBERSHIP
;
870 * Convert Linux TCP level socket option number to NetBSD values.
873 linux_to_bsd_tcp_sockopt(int lopt
)
877 case LINUX_TCP_NODELAY
:
879 case LINUX_TCP_MAXSEG
:
887 * Convert Linux UDP level socket option number to NetBSD values.
890 linux_to_bsd_udp_sockopt(int lopt
)
900 * Another reasonably straightforward function: setsockopt(2).
901 * The level and option numbers are converted; the values passed
902 * are not (yet) converted, the ones currently implemented don't
903 * need conversion, as they are the same on both systems.
906 linux_sys_setsockopt(struct lwp
*l
, const struct linux_sys_setsockopt_args
*uap
, register_t
*retval
)
910 syscallarg(int) level;
911 syscallarg(int) optname;
912 syscallarg(void *) optval;
913 syscallarg(int) optlen;
915 struct sys_setsockopt_args bsa
;
918 SCARG(&bsa
, s
) = SCARG(uap
, s
);
919 SCARG(&bsa
, level
) = linux_to_bsd_sopt_level(SCARG(uap
, level
));
920 SCARG(&bsa
, val
) = SCARG(uap
, optval
);
921 SCARG(&bsa
, valsize
) = SCARG(uap
, optlen
);
924 * Linux supports only SOL_SOCKET for AF_LOCAL domain sockets
925 * and returns EOPNOTSUPP for other levels
927 if (SCARG(&bsa
, level
) != SOL_SOCKET
) {
931 /* fd_getsock() will use the descriptor for us */
932 if ((error
= fd_getsock(SCARG(&bsa
, s
), &so
)) != 0)
934 family
= so
->so_proto
->pr_domain
->dom_family
;
935 fd_putfile(SCARG(&bsa
, s
));
937 if (family
== AF_LOCAL
)
941 switch (SCARG(&bsa
, level
)) {
943 name
= linux_to_bsd_so_sockopt(SCARG(uap
, optname
));
946 name
= linux_to_bsd_ip_sockopt(SCARG(uap
, optname
));
949 name
= linux_to_bsd_tcp_sockopt(SCARG(uap
, optname
));
952 name
= linux_to_bsd_udp_sockopt(SCARG(uap
, optname
));
960 SCARG(&bsa
, name
) = name
;
962 return sys_setsockopt(l
, &bsa
, retval
);
966 * getsockopt(2) is very much the same as setsockopt(2) (see above)
969 linux_sys_getsockopt(struct lwp
*l
, const struct linux_sys_getsockopt_args
*uap
, register_t
*retval
)
973 syscallarg(int) level;
974 syscallarg(int) optname;
975 syscallarg(void *) optval;
976 syscallarg(int *) optlen;
978 struct sys_getsockopt_args bga
;
981 SCARG(&bga
, s
) = SCARG(uap
, s
);
982 SCARG(&bga
, level
) = linux_to_bsd_sopt_level(SCARG(uap
, level
));
983 SCARG(&bga
, val
) = SCARG(uap
, optval
);
984 SCARG(&bga
, avalsize
) = (socklen_t
*)SCARG(uap
, optlen
);
986 switch (SCARG(&bga
, level
)) {
988 name
= linux_to_bsd_so_sockopt(SCARG(uap
, optname
));
991 name
= linux_to_bsd_ip_sockopt(SCARG(uap
, optname
));
994 name
= linux_to_bsd_tcp_sockopt(SCARG(uap
, optname
));
997 name
= linux_to_bsd_udp_sockopt(SCARG(uap
, optname
));
1005 SCARG(&bga
, name
) = name
;
1007 return sys_getsockopt(l
, &bga
, retval
);
1011 linux_getifname(struct lwp
*l
, register_t
*retval
, void *data
)
1014 struct linux_ifreq ifr
;
1017 error
= copyin(data
, &ifr
, sizeof(ifr
));
1021 if (ifr
.ifr_ifru
.ifru_ifindex
>= if_indexlim
)
1024 ifp
= ifindex2ifnet
[ifr
.ifr_ifru
.ifru_ifindex
];
1028 strncpy(ifr
.ifr_name
, ifp
->if_xname
, sizeof(ifr
.ifr_name
));
1030 return copyout(&ifr
, data
, sizeof(ifr
));
1034 linux_getifconf(struct lwp
*l
, register_t
*retval
, void *data
)
1036 struct linux_ifreq ifr
, *ifrp
;
1037 struct ifconf
*ifc
= data
;
1040 struct sockaddr
*sa
;
1041 struct osockaddr
*osa
;
1042 int space
, error
= 0;
1043 const int sz
= (int)sizeof(ifr
);
1045 ifrp
= (struct linux_ifreq
*)ifc
->ifc_req
;
1049 space
= ifc
->ifc_len
;
1051 IFNET_FOREACH(ifp
) {
1052 (void)strncpy(ifr
.ifr_name
, ifp
->if_xname
,
1053 sizeof(ifr
.ifr_name
));
1054 if (ifr
.ifr_name
[sizeof(ifr
.ifr_name
) - 1] != '\0')
1055 return ENAMETOOLONG
;
1056 if (IFADDR_EMPTY(ifp
))
1058 IFADDR_FOREACH(ifa
, ifp
) {
1060 if (sa
->sa_family
!= AF_INET
||
1061 sa
->sa_len
> sizeof(*osa
))
1063 memcpy(&ifr
.ifr_addr
, sa
, sa
->sa_len
);
1064 osa
= (struct osockaddr
*)&ifr
.ifr_addr
;
1065 osa
->sa_family
= sa
->sa_family
;
1067 error
= copyout(&ifr
, ifrp
, sz
);
1077 ifc
->ifc_len
-= space
;
1079 ifc
->ifc_len
= -space
;
1085 linux_getifhwaddr(struct lwp
*l
, register_t
*retval
, u_int fd
,
1088 /* Not the full structure, just enough to map what we do here */
1089 struct linux_ifreq lreq
;
1093 struct sockaddr_dl
*sadl
;
1098 * We can't emulate this ioctl by calling sys_ioctl() to run
1099 * SIOCGIFCONF, because the user buffer is not of the right
1100 * type to take those results. We can't use kernel buffers to
1101 * receive the results, as the implementation of sys_ioctl()
1102 * and ifconf() [which implements SIOCGIFCONF] use
1103 * copyin()/copyout() which will fail on kernel addresses.
1105 * So, we must duplicate code from sys_ioctl() and ifconf(). Ugh.
1108 if ((fp
= fd_getfile(fd
)) == NULL
)
1111 KERNEL_LOCK(1, NULL
);
1113 if ((fp
->f_flag
& (FREAD
| FWRITE
)) == 0) {
1118 error
= copyin(data
, &lreq
, sizeof(lreq
));
1121 lreq
.ifr_name
[LINUX_IFNAMSIZ
-1] = '\0'; /* just in case */
1124 * Try real interface name first, then fake "ethX"
1127 IFNET_FOREACH(ifp
) {
1130 if (strcmp(lreq
.ifr_name
, ifp
->if_xname
))
1131 /* not this interface */
1134 if (IFADDR_EMPTY(ifp
)) {
1138 IFADDR_FOREACH(ifa
, ifp
) {
1139 sadl
= satosdl(ifa
->ifa_addr
);
1140 /* only return ethernet addresses */
1141 /* XXX what about FDDI, etc. ? */
1142 if (sadl
->sdl_family
!= AF_LINK
||
1143 sadl
->sdl_type
!= IFT_ETHER
)
1145 memcpy(&lreq
.ifr_hwaddr
.sa_data
, CLLADDR(sadl
),
1147 sizeof(lreq
.ifr_hwaddr
.sa_data
)));
1148 lreq
.ifr_hwaddr
.sa_family
=
1150 error
= copyout(&lreq
, data
, sizeof(lreq
));
1155 if (strncmp(lreq
.ifr_name
, "eth", 3) == 0) {
1156 for (ifnum
= 0, index
= 3;
1157 lreq
.ifr_name
[index
] != '\0' && index
< LINUX_IFNAMSIZ
;
1160 ifnum
+= lreq
.ifr_name
[index
] - '0';
1163 error
= EINVAL
; /* in case we don't find one */
1165 IFNET_FOREACH(ifp
) {
1168 memcpy(lreq
.ifr_name
, ifp
->if_xname
,
1169 MIN(LINUX_IFNAMSIZ
, IFNAMSIZ
));
1170 IFADDR_FOREACH(ifa
, ifp
) {
1171 sadl
= satosdl(ifa
->ifa_addr
);
1172 /* only return ethernet addresses */
1173 /* XXX what about FDDI, etc. ? */
1174 if (sadl
->sdl_family
!= AF_LINK
||
1175 sadl
->sdl_type
!= IFT_ETHER
)
1178 /* not the reqested iface */
1180 memcpy(&lreq
.ifr_hwaddr
.sa_data
,
1183 sizeof(lreq
.ifr_hwaddr
.sa_data
)));
1184 lreq
.ifr_hwaddr
.sa_family
=
1186 error
= copyout(&lreq
, data
, sizeof(lreq
));
1192 /* unknown interface, not even an "eth*" name */
1197 KERNEL_UNLOCK_ONE(NULL
);
1203 linux_ioctl_socket(struct lwp
*l
, const struct linux_sys_ioctl_args
*uap
, register_t
*retval
)
1207 syscallarg(u_long) com;
1208 syscallarg(void *) data;
1211 int error
= 0, isdev
= 0, dosys
= 1;
1212 struct sys_ioctl_args ia
;
1215 int (*ioctlf
)(file_t
*, u_long
, void *);
1218 if ((fp
= fd_getfile(SCARG(uap
, fd
))) == NULL
)
1221 if (fp
->f_type
== DTYPE_VNODE
) {
1222 vp
= (struct vnode
*)fp
->f_data
;
1223 isdev
= vp
->v_type
== VCHR
;
1227 * Don't try to interpret socket ioctl calls that are done
1228 * on a device filedescriptor, just pass them through, to
1229 * emulate Linux behaviour. Use PTIOCLINUX so that the
1230 * device will only handle these if it's prepared to do
1231 * so, to avoid unexpected things from happening.
1235 ioctlf
= fp
->f_ops
->fo_ioctl
;
1236 pt
.com
= SCARG(uap
, com
);
1237 pt
.data
= SCARG(uap
, data
);
1238 error
= ioctlf(fp
, PTIOCLINUX
, &pt
);
1240 * XXX hack: if the function returns EJUSTRETURN,
1241 * it has stuffed a sysctl return value in pt.data.
1243 if (error
== EJUSTRETURN
) {
1244 retval
[0] = (register_t
)pt
.data
;
1250 com
= SCARG(uap
, com
);
1254 case LINUX_SIOCGIFNAME
:
1255 error
= linux_getifname(l
, retval
, SCARG(uap
, data
));
1258 case LINUX_SIOCGIFCONF
:
1259 error
= linux_getifconf(l
, retval
, SCARG(uap
, data
));
1262 case LINUX_SIOCGIFFLAGS
:
1263 SCARG(&ia
, com
) = OSIOCGIFFLAGS
;
1265 case LINUX_SIOCSIFFLAGS
:
1266 SCARG(&ia
, com
) = OSIOCSIFFLAGS
;
1268 case LINUX_SIOCGIFADDR
:
1269 SCARG(&ia
, com
) = OOSIOCGIFADDR
;
1271 case LINUX_SIOCGIFDSTADDR
:
1272 SCARG(&ia
, com
) = OOSIOCGIFDSTADDR
;
1274 case LINUX_SIOCGIFBRDADDR
:
1275 SCARG(&ia
, com
) = OOSIOCGIFBRDADDR
;
1277 case LINUX_SIOCGIFNETMASK
:
1278 SCARG(&ia
, com
) = OOSIOCGIFNETMASK
;
1280 case LINUX_SIOCGIFMTU
:
1281 SCARG(&ia
, com
) = OSIOCGIFMTU
;
1283 case LINUX_SIOCADDMULTI
:
1284 SCARG(&ia
, com
) = OSIOCADDMULTI
;
1286 case LINUX_SIOCDELMULTI
:
1287 SCARG(&ia
, com
) = OSIOCDELMULTI
;
1289 case LINUX_SIOCGIFHWADDR
:
1290 error
= linux_getifhwaddr(l
, retval
, SCARG(uap
, fd
),
1299 fd_putfile(SCARG(uap
, fd
));
1301 if (error
==0 && dosys
) {
1302 SCARG(&ia
, fd
) = SCARG(uap
, fd
);
1303 SCARG(&ia
, data
) = SCARG(uap
, data
);
1304 error
= sys_ioctl(curlwp
, &ia
, retval
);
1311 linux_sys_connect(struct lwp
*l
, const struct linux_sys_connect_args
*uap
, register_t
*retval
)
1315 syscallarg(const struct sockaddr *) name;
1316 syscallarg(int) namelen;
1321 error
= linux_get_sa(l
, SCARG(uap
, s
), &nam
, SCARG(uap
, name
),
1322 SCARG(uap
, namelen
));
1326 error
= do_sys_connect(l
, SCARG(uap
, s
), nam
);
1328 if (error
== EISCONN
) {
1330 int state
, prflags
, nbio
;
1332 /* fd_getsock() will use the descriptor for us */
1333 if (fd_getsock(SCARG(uap
, s
), &so
) != 0)
1337 state
= so
->so_state
;
1339 prflags
= so
->so_proto
->pr_flags
;
1341 fd_putfile(SCARG(uap
, s
));
1343 * We should only let this call succeed once per
1344 * non-blocking connect; however we don't have
1345 * a convenient place to keep that state..
1347 if (nbio
&& (state
& SS_ISCONNECTED
) &&
1348 (prflags
& PR_CONNREQUIRED
))
1356 linux_sys_bind(struct lwp
*l
, const struct linux_sys_bind_args
*uap
, register_t
*retval
)
1360 syscallarg(const struct osockaddr *) name;
1361 syscallarg(int) namelen;
1366 error
= linux_get_sa(l
, SCARG(uap
, s
), &nam
, SCARG(uap
, name
),
1367 SCARG(uap
, namelen
));
1371 return do_sys_bind(l
, SCARG(uap
, s
), nam
);
1375 linux_sys_getsockname(struct lwp
*l
, const struct linux_sys_getsockname_args
*uap
, register_t
*retval
)
1378 syscallarg(int) fdes;
1379 syscallarg(void *) asa;
1380 syscallarg(int *) alen;
1384 if ((error
= sys_getsockname(l
, (const void *)uap
, retval
)) != 0)
1387 if ((error
= linux_sa_put((struct osockaddr
*)SCARG(uap
, asa
))))
1394 linux_sys_getpeername(struct lwp
*l
, const struct linux_sys_getpeername_args
*uap
, register_t
*retval
)
1397 syscallarg(int) fdes;
1398 syscallarg(void *) asa;
1399 syscallarg(int *) alen;
1403 if ((error
= sys_getpeername(l
, (const void *)uap
, retval
)) != 0)
1406 if ((error
= linux_sa_put((struct osockaddr
*)SCARG(uap
, asa
))))
1413 * Copy the osockaddr structure pointed to by osa to mbuf, adjust
1414 * family and convert to sockaddr.
1417 linux_get_sa(struct lwp
*l
, int s
, struct mbuf
**mp
,
1418 const struct osockaddr
*osa
, unsigned int salen
)
1421 struct sockaddr
*sa
;
1422 struct osockaddr
*kosa
;
1425 if (salen
== 1 || salen
> UCHAR_MAX
) {
1426 DPRINTF(("bad osa=%p salen=%d\n", osa
, salen
));
1430 /* We'll need the address in an mbuf later, so copy into one here */
1431 m
= m_get(M_WAIT
, MT_SONAME
);
1433 MEXTMALLOC(m
, salen
, M_WAITOK
);
1442 kosa
= mtod(m
, void *);
1443 if ((error
= copyin(osa
, kosa
, salen
))) {
1444 DPRINTF(("error %d copying osa %p len %d\n",
1445 error
, osa
, salen
));
1449 ktrkuser("linux sockaddr", kosa
, salen
);
1451 bdom
= linux_to_bsd_domain(kosa
->sa_family
);
1453 DPRINTF(("bad linux family=%d\n", kosa
->sa_family
));
1459 * If the family is unspecified, use address family of the socket.
1460 * This avoid triggering strict family checks in netinet/in_pcb.c et.al.
1462 if (bdom
== AF_UNSPEC
) {
1465 /* fd_getsock() will use the descriptor for us */
1466 if ((error
= fd_getsock(s
, &so
)) != 0)
1469 bdom
= so
->so_proto
->pr_domain
->dom_family
;
1472 DPRINTF(("AF_UNSPEC family adjusted to %d\n", bdom
));
1476 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
1477 * which lacks the scope id compared with RFC2553 one. If we detect
1478 * the situation, reject the address and write a message to system log.
1480 * Still accept addresses for which the scope id is not used.
1482 if (bdom
== AF_INET6
&& salen
== sizeof (struct sockaddr_in6
) - sizeof (u_int32_t
)) {
1483 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)kosa
;
1484 if (!IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
) &&
1485 (IN6_IS_ADDR_LINKLOCAL(&sin6
->sin6_addr
) ||
1486 IN6_IS_ADDR_SITELOCAL(&sin6
->sin6_addr
) ||
1487 IN6_IS_ADDR_V4COMPAT(&sin6
->sin6_addr
) ||
1488 IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
) ||
1489 IN6_IS_ADDR_MULTICAST(&sin6
->sin6_addr
))) {
1490 struct proc
*p
= l
->l_proc
;
1491 int uid
= l
->l_cred
? kauth_cred_geteuid(l
->l_cred
) : -1;
1494 "pid %d (%s), uid %d: obsolete pre-RFC2553 "
1495 "sockaddr_in6 rejected",
1496 p
->p_pid
, p
->p_comm
, uid
);
1500 salen
= sizeof (struct sockaddr_in6
);
1501 sin6
->sin6_scope_id
= 0;
1504 if (bdom
== AF_INET
)
1505 salen
= sizeof(struct sockaddr_in
);
1507 sa
= (struct sockaddr
*) kosa
;
1508 sa
->sa_family
= bdom
;
1511 ktrkuser("new sockaddr", kosa
, salen
);
1514 DPRINTF(("family %d, len = %d [ ", sa
->sa_family
, sa
->sa_len
));
1515 for (bdom
= 0; bdom
< sizeof(sa
->sa_data
); bdom
++)
1516 DPRINTF(("%02x ", (unsigned char) sa
->sa_data
[bdom
]));
1529 linux_sa_put(struct osockaddr
*osa
)
1532 struct osockaddr
*kosa
;
1533 int error
, bdom
, len
;
1536 * Only read/write the sockaddr family and length part, the rest is
1539 len
= sizeof(sa
.sa_len
) + sizeof(sa
.sa_family
);
1541 error
= copyin(osa
, &sa
, len
);
1545 bdom
= bsd_to_linux_domain(sa
.sa_family
);
1549 /* Note: we convert from sockaddr to osockaddr here, too */
1550 kosa
= (struct osockaddr
*) &sa
;
1551 kosa
->sa_family
= bdom
;
1552 error
= copyout(kosa
, osa
, len
);
1561 linux_sys_recv(struct lwp
*l
, const struct linux_sys_recv_args
*uap
, register_t
*retval
)
1565 syscallarg(void *) buf;
1566 syscallarg(int) len;
1567 syscallarg(int) flags;
1569 struct sys_recvfrom_args bra
;
1572 SCARG(&bra
, s
) = SCARG(uap
, s
);
1573 SCARG(&bra
, buf
) = SCARG(uap
, buf
);
1574 SCARG(&bra
, len
) = (size_t) SCARG(uap
, len
);
1575 SCARG(&bra
, flags
) = SCARG(uap
, flags
);
1576 SCARG(&bra
, from
) = NULL
;
1577 SCARG(&bra
, fromlenaddr
) = NULL
;
1579 return (sys_recvfrom(l
, &bra
, retval
));
1583 linux_sys_send(struct lwp
*l
, const struct linux_sys_send_args
*uap
, register_t
*retval
)
1587 syscallarg(void *) buf;
1588 syscallarg(int) len;
1589 syscallarg(int) flags;
1591 struct sys_sendto_args bsa
;
1593 SCARG(&bsa
, s
) = SCARG(uap
, s
);
1594 SCARG(&bsa
, buf
) = SCARG(uap
, buf
);
1595 SCARG(&bsa
, len
) = SCARG(uap
, len
);
1596 SCARG(&bsa
, flags
) = SCARG(uap
, flags
);
1597 SCARG(&bsa
, to
) = NULL
;
1598 SCARG(&bsa
, tolen
) = 0;
1600 return (sys_sendto(l
, &bsa
, retval
));
1605 linux_sys_accept(struct lwp
*l
, const struct linux_sys_accept_args
*uap
, register_t
*retval
)
1609 syscallarg(struct osockaddr *) name;
1610 syscallarg(int *) anamelen;
1613 struct sys_accept_args baa
;
1615 SCARG(&baa
, s
) = SCARG(uap
, s
);
1616 SCARG(&baa
, name
) = (struct sockaddr
*) SCARG(uap
, name
);
1617 SCARG(&baa
, anamelen
) = (unsigned int *) SCARG(uap
, anamelen
);
1619 if ((error
= sys_accept(l
, &baa
, retval
)))
1622 if (SCARG(uap
, name
) && (error
= linux_sa_put(SCARG(uap
, name
))))