1 /* $NetBSD: linux_exec_machdep.c,v 1.13 2009/09/20 10:29:30 taca Exp $ */
4 * Copyright (c) 2004 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: linux_exec_machdep.c,v 1.13 2009/09/20 10:29:30 taca Exp $");
35 #if defined(_KERNEL_OPT)
37 #include "opt_user_ldt.h"
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/resource.h>
46 #include <sys/exec_elf.h>
47 #include <sys/vnode.h>
51 #include <machine/vmparam.h>
55 #include <sys/syscallargs.h>
60 #define DPRINTF(a) uprintf a
63 #include <compat/linux/common/linux_types.h>
64 #include <compat/linux/common/linux_signal.h>
65 #include <compat/linux/common/linux_machdep.h>
66 #include <compat/linux/common/linux_util.h>
67 #include <compat/linux/common/linux_ioctl.h>
68 #include <compat/linux/common/linux_hdio.h>
69 #include <compat/linux/common/linux_exec.h>
70 #include <compat/linux/common/linux_errno.h>
71 #include <compat/linux//linux_syscallargs.h>
75 linux_exec_setup_stack(struct lwp
*l
, struct exec_package
*epp
)
77 u_long max_stack_size
;
78 u_long access_linear_min
, access_size
;
79 u_long noaccess_linear_min
, noaccess_size
;
82 #define USRSTACK32 (0x00000000ffffffffL&~PGOFSET)
85 if (epp
->ep_flags
& EXEC_32
) {
86 epp
->ep_minsaddr
= USRSTACK32
;
87 max_stack_size
= MAXSSIZ
;
89 epp
->ep_minsaddr
= USRSTACK
;
90 max_stack_size
= MAXSSIZ
;
93 if (epp
->ep_minsaddr
> LINUX_USRSTACK
)
94 epp
->ep_minsaddr
= LINUX_USRSTACK
;
98 * Someone needs to make KERNBASE and TEXTADDR
99 * java versions < 1.4.2 need the stack to be
102 uprintf("Cannot setup stack to 0xC0000000, "
103 "java will not work properly\n");
106 epp
->ep_maxsaddr
= (u_long
)STACK_GROW(epp
->ep_minsaddr
,
108 epp
->ep_ssize
= l
->l_proc
->p_rlimit
[RLIMIT_STACK
].rlim_cur
;
111 * set up commands for stack. note that this takes *two*, one to
112 * map the part of the stack which we can access, and one to map
113 * the part which we can't.
115 * arguably, it could be made into one, but that would require the
116 * addition of another mapping proc, which is unnecessary
118 access_size
= epp
->ep_ssize
;
119 access_linear_min
= (u_long
)STACK_ALLOC(epp
->ep_minsaddr
, access_size
);
120 noaccess_size
= max_stack_size
- access_size
;
121 noaccess_linear_min
= (u_long
)STACK_ALLOC(STACK_GROW(epp
->ep_minsaddr
,
122 access_size
), noaccess_size
);
123 if (noaccess_size
> 0) {
124 NEW_VMCMD2(&epp
->ep_vmcmds
, vmcmd_map_zero
, noaccess_size
,
125 noaccess_linear_min
, NULLVP
, 0, VM_PROT_NONE
, VMCMD_STACK
);
127 KASSERT(access_size
> 0);
128 NEW_VMCMD2(&epp
->ep_vmcmds
, vmcmd_map_zero
, access_size
,
129 access_linear_min
, NULLVP
, 0, VM_PROT_READ
| VM_PROT_WRITE
,
140 __asm
__volatile("movw %0,%%gs" : : "rm" ((unsigned short)sel
));
145 linux_init_thread_area(struct lwp
*l
, struct lwp
*l2
)
147 struct trapframe
*tf
= l
->l_md
.md_regs
, *tf2
= l2
->l_md
.md_regs
;
148 struct pcb
*pcb2
= lwp_getpcb(l2
);
149 struct linux_user_desc info
;
150 struct segment_descriptor sd
;
151 int error
, idx
, a
[2];
153 error
= copyin((void *)tf
->tf_esi
, &info
, sizeof(info
));
156 idx
= info
.entry_number
;
159 * looks like we're getting the idx we returned
160 * in the set_thread_area() syscall
162 if (idx
!= LINUX_GLIBC_TLS_SEL
&& idx
!= GUGS_SEL
) {
163 DPRINTF(("resetting idx %d to GUGS_SEL", idx
));
167 /* this doesnt happen in practice */
168 if (idx
== LINUX_GLIBC_TLS_SEL
) {
169 /* we might copy out the entry_number as 3 */
170 info
.entry_number
= GUGS_SEL
;
171 error
= copyout(&info
, (void *)tf
->tf_esi
, sizeof(info
));
176 a
[0] = LINUX_LDT_entry_a(&info
);
177 a
[1] = LINUX_LDT_entry_b(&info
);
179 (void)memcpy(&sd
, &a
, sizeof(a
));
180 KASSERT(ISMEMSDP((&sd
)));
181 DPRINTF(("Segment created in clone with CLONE_SETTLS: lobase: %x, "
182 "hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, "
183 "xx: %i, def32: %i, gran: %i\n", sd
.sd_lobase
,
184 sd
.sd_hibase
, sd
.sd_lolimit
, sd
.sd_hilimit
, sd
.sd_type
, sd
.sd_dpl
,
185 sd
.sd_p
, sd
.sd_xx
, sd
.sd_def32
, sd
.sd_gran
));
187 (void)memcpy(&pcb2
->pcb_gsd
, &sd
, sizeof(sd
));
188 tf2
->tf_gs
= GSEL(GUGS_SEL
, SEL_UPL
);
195 linux_sys_set_thread_area(struct lwp
*l
,
196 const struct linux_sys_set_thread_area_args
*uap
, register_t
*retval
)
198 struct pcb
*pcb
= lwp_getpcb(l
);
199 struct linux_user_desc info
;
200 struct segment_descriptor sd
;
201 int error
, idx
, a
[2];
204 error
= copyin(SCARG(uap
, desc
), &info
, sizeof(info
));
208 DPRINTF(("set thread area: %i, %x, %x, %i, %i, %i, %i, %i, %i\n",
209 info
.entry_number
, info
.base_addr
, info
.limit
, info
.seg_32bit
,
210 info
.contents
, info
.read_exec_only
, info
.limit_in_pages
,
211 info
.seg_not_present
, info
.useable
));
213 idx
= info
.entry_number
;
215 * Semantics of linux version: every thread in the system has array of
216 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
217 * syscall loads one of the selected tls decriptors with a value and
218 * also loads GDT descriptors 6, 7 and 8 with the content of the
219 * per-thread descriptors.
221 * Semantics of fbsd version: I think we can ignore that linux has 3
222 * per-thread descriptors and use just the 1st one. The tls_array[]
223 * is used only in set/get-thread_area() syscalls and for loading the
224 * GDT descriptors. In fbsd we use just one GDT descriptor for TLS so
225 * we will load just one.
227 * XXX: this doesn't work when a user space process tries to use more
228 * than 1 TLS segment. Comment in the linux sources says wine might do
233 * we support just GLIBC TLS now
234 * we should let 3 proceed as well because we use this segment so
235 * if code does two subsequent calls it should succeed
237 if (idx
!= LINUX_GLIBC_TLS_SEL
&& idx
!= -1 && idx
!= GUGS_SEL
)
241 * we have to copy out the GDT entry we use
242 * FreeBSD uses GDT entry #3 for storing %gs so load that
244 * XXX: what if a user space program doesn't check this value and tries
247 idx
= info
.entry_number
= GUGS_SEL
;
248 error
= copyout(&info
, SCARG(uap
, desc
), sizeof(info
));
252 if (LINUX_LDT_empty(&info
)) {
256 a
[0] = LINUX_LDT_entry_a(&info
);
257 a
[1] = LINUX_LDT_entry_b(&info
);
260 (void)memcpy(&sd
, &a
, sizeof(a
));
261 KASSERT(ISMEMSDP((&sd
)));
262 DPRINTF(("Segment created in set_thread_area: lobase: %x, hibase: %x, "
263 "lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, "
264 "def32: %i, gran: %i\n", sd
.sd_lobase
, sd
.sd_hibase
, sd
.sd_lolimit
,
265 sd
.sd_hilimit
, sd
.sd_type
, sd
.sd_dpl
, sd
.sd_p
, sd
.sd_xx
,
266 sd
.sd_def32
, sd
.sd_gran
));
269 (void)memcpy(&pcb
->pcb_gsd
, &sd
, sizeof(sd
));
270 (void)memcpy(&curcpu()->ci_gdt
[GUGS_SEL
], &sd
, sizeof(sd
));
271 load_gs(GSEL(GUGS_SEL
, SEL_UPL
));
277 linux_sys_get_thread_area(struct lwp
*l
,
278 const struct linux_sys_get_thread_area_args
*uap
, register_t
*retval
)
280 struct pcb
*pcb
= lwp_getpcb(l
);
281 struct linux_user_desc info
;
282 struct linux_desc_struct desc
;
283 struct segment_descriptor sd
;
287 error
= copyin(SCARG(uap
, desc
), &info
, sizeof(info
));
291 idx
= info
.entry_number
;
292 /* XXX: I am not sure if we want 3 to be allowed too. */
293 if (idx
!= LINUX_GLIBC_TLS_SEL
&& idx
!= GUGS_SEL
)
298 (void)memset(&info
, 0, sizeof(info
));
299 (void)memcpy(&sd
, pcb
->pcb_gsd
, sizeof(sd
));
300 (void)memcpy(&desc
, &sd
, sizeof(desc
));
302 info
.entry_number
= idx
;
303 info
.base_addr
= LINUX_GET_BASE(&desc
);
304 info
.limit
= LINUX_GET_LIMIT(&desc
);
305 info
.seg_32bit
= LINUX_GET_32BIT(&desc
);
306 info
.contents
= LINUX_GET_CONTENTS(&desc
);
307 info
.read_exec_only
= !LINUX_GET_WRITABLE(&desc
);
308 info
.limit_in_pages
= LINUX_GET_LIMIT_PAGES(&desc
);
309 info
.seg_not_present
= !LINUX_GET_PRESENT(&desc
);
310 info
.useable
= LINUX_GET_USEABLE(&desc
);
312 return copyout(&info
, SCARG(uap
, desc
), sizeof(info
));