1 /* create_posix_thread.cc: funcs to create posix threads or thread stacks
3 This file is part of Cygwin.
5 This software is a copyrighted work licensed under the terms of the
6 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
10 #include <sys/param.h>
11 #include "create_posix_thread.h"
12 #include "cygheap_malloc.h"
14 #include "mmap_alloc.h"
16 /* create_posix_thread
18 Replacement function for CreateThread to create pthreads. Mainly this
19 creates its own stack, either from Cygwin's thread pool, or allowing
20 the caller to specify own stack addresses, stack sizes and guard pages.
22 create_new_main_thread_stack
24 Just set up a system-like main thread stack from the pthread stack area
25 maintained by the thr_alloc class. See the description in _dll_crt0 to
26 understand why we have to do this. */
28 struct pthread_wrapper_arg
30 LPTHREAD_START_ROUTINE func
;
39 pthread_wrapper (PVOID arg
)
41 /* Just plain paranoia. */
43 return ERROR_INVALID_PARAMETER
;
45 /* The process is now threaded. Note for later usage by arc4random. */
48 /* Fetch thread wrapper info and free from cygheap. */
49 pthread_wrapper_arg wrapper_arg
= *(pthread_wrapper_arg
*) arg
;
52 /* Set stack values in TEB */
53 PTEB teb
= NtCurrentTeb ();
54 teb
->Tib
.StackBase
= wrapper_arg
.stackbase
;
55 teb
->Tib
.StackLimit
= wrapper_arg
.stacklimit
?: wrapper_arg
.stackaddr
;
56 /* Set DeallocationStack value. If we have an application-provided stack,
57 we set DeallocationStack to NULL, so NtTerminateThread does not deallocate
58 any stack. If we created the stack in CygwinCreateThread, we set
59 DeallocationStack to the stackaddr of our own stack, so it's automatically
60 deallocated when the thread is terminated. */
61 PBYTE dealloc_addr
= (PBYTE
) teb
->DeallocationStack
;
62 teb
->DeallocationStack
= wrapper_arg
.stacklimit
? wrapper_arg
.stackaddr
64 /* Store the OS-provided DeallocationStack address in wrapper_arg.stackaddr.
65 The below assembler code will release the OS stack after switching to our
67 wrapper_arg
.stackaddr
= dealloc_addr
;
68 /* Set thread stack guarantee matching the guardsize.
69 Note that the guardsize is one page bigger than the guarantee. */
70 if (wrapper_arg
.guardsize
> wincap
.def_guard_page_size ())
72 wrapper_arg
.guardsize
-= wincap
.page_size ();
73 SetThreadStackGuarantee (&wrapper_arg
.guardsize
);
75 /* Initialize new _cygtls. */
76 _my_tls
.init_thread (wrapper_arg
.stackbase
- __CYGTLS_PADSIZE__
,
77 (DWORD (*)(void*, void*)) wrapper_arg
.func
);
80 leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\
81 movq (%%rbx), %%r12 # Load thread func into r12 \n\
82 movq 8(%%rbx), %%r13 # Load thread arg into r13 \n\
83 movq 16(%%rbx), %%rcx # Load stackaddr into rcx \n\
84 movq 24(%%rbx), %%rsp # Load stackbase into rsp \n\
85 subq %[CYGTLS], %%rsp # Subtract __CYGTLS_PADSIZE__ \n\
86 # (here we are 16 bytes aligned)\n\
87 subq $32, %%rsp # Subtract another 32 bytes \n\
88 # (shadow space for arg regs) \n\
89 xorq %%rbp, %%rbp # Set rbp to 0 \n\
90 # We moved to the new stack. \n\
91 # Now it's safe to release the OS stack. \n\
92 movl $0x8000, %%r8d # dwFreeType: MEM_RELEASE \n\
93 xorl %%edx, %%edx # dwSize: 0 \n\
94 # dwAddress is already in the correct arg register rcx \n\
96 # All set. We can copy the thread arg from the safe \n\
97 # register r13 and then just call the function. \n\
98 movq %%r13, %%rcx # Move thread arg to 1st arg reg\n\
99 call *%%r12 # Call thread func \n"
100 : : [WRAPPER_ARG
] "o" (wrapper_arg
),
101 [CYGTLS
] "i" (__CYGTLS_PADSIZE__
));
103 #error unimplemented for this target
105 /* pthread::thread_init_wrapper calls pthread::exit, which
106 in turn calls ExitThread, so we should never arrive here. */
107 api_fatal ("Dumb thinko in pthread handling. Whip the developer.");
110 /* We provide the stacks always in 1 Megabyte slots */
111 #define THREAD_STACK_SLOT 0x000100000L /* 1 Meg */
112 /* Maximum stack size returned from the pool. */
113 #define THREAD_STACK_MAX 0x040000000L /* 1 Gig */
115 class thread_allocator
118 PVOID (thread_allocator::*alloc_func
) (SIZE_T
);
119 PVOID
_alloc (SIZE_T size
)
121 static const MEM_ADDRESS_REQUIREMENTS thread_req
= {
122 (PVOID
) THREAD_STORAGE_LOW
,
123 (PVOID
) (THREAD_STORAGE_HIGH
- 1),
126 /* g++ 11.2 workaround: don't use initializer */
127 MEM_EXTENDED_PARAMETER thread_ext
= { 0 };
128 thread_ext
.Type
= MemExtendedParameterAddressRequirements
;
129 thread_ext
.Pointer
= (PVOID
) &thread_req
;
131 SIZE_T real_size
= roundup2 (size
, THREAD_STACK_SLOT
);
132 PVOID real_stackaddr
= NULL
;
134 if (real_size
<= THREAD_STACK_MAX
)
135 real_stackaddr
= VirtualAlloc2 (GetCurrentProcess(), NULL
, real_size
,
136 MEM_RESERVE
| MEM_TOP_DOWN
,
137 PAGE_READWRITE
, &thread_ext
, 1);
138 /* If the thread area allocation failed, or if the application requests a
139 monster stack, fulfill request from mmap area. */
142 static const MEM_ADDRESS_REQUIREMENTS mmap_req
= {
143 (PVOID
) MMAP_STORAGE_LOW
,
144 (PVOID
) (MMAP_STORAGE_HIGH
- 1),
147 /* g++ 11.2 workaround: don't use initializer */
148 MEM_EXTENDED_PARAMETER mmap_ext
= { 0 };
149 mmap_ext
.Type
= MemExtendedParameterAddressRequirements
;
150 mmap_ext
.Pointer
= (PVOID
) &mmap_req
;
152 real_stackaddr
= VirtualAlloc2 (GetCurrentProcess(), NULL
, real_size
,
153 MEM_RESERVE
| MEM_TOP_DOWN
,
154 PAGE_READWRITE
, &mmap_ext
, 1);
156 return real_stackaddr
;
158 PVOID
_alloc_old (SIZE_T size
)
160 SIZE_T real_size
= roundup2 (size
, THREAD_STACK_SLOT
);
161 BOOL overflow
= FALSE
;
162 PVOID real_stackaddr
= NULL
;
164 /* If an application requests a monster stack, fulfill request
166 if (real_size
> THREAD_STACK_MAX
)
168 PVOID addr
= mmap_alloc
.alloc (NULL
, real_size
, false);
169 return VirtualAlloc (addr
, real_size
, MEM_RESERVE
, PAGE_READWRITE
);
171 /* Simple round-robin. Keep looping until VirtualAlloc succeeded, or
172 until we overflowed and hit the current address. */
173 for (UINT_PTR addr
= current
- real_size
;
174 !real_stackaddr
&& (!overflow
|| addr
>= current
);
175 addr
-= THREAD_STACK_SLOT
)
177 if (addr
< THREAD_STORAGE_LOW
)
179 addr
= THREAD_STORAGE_HIGH
- real_size
;
182 real_stackaddr
= VirtualAlloc ((PVOID
) addr
, real_size
,
183 MEM_RESERVE
, PAGE_READWRITE
);
186 /* So we couldn't grab this space. Let's check the state.
187 If this area is free, simply try the next lower 1 Meg slot.
188 Otherwise, shift the next try down to the AllocationBase
189 of the current address, minus the requested slot size.
190 Add THREAD_STACK_SLOT since that's subtracted in the next
191 run of the loop anyway. */
192 MEMORY_BASIC_INFORMATION mbi
;
193 VirtualQuery ((PVOID
) addr
, &mbi
, sizeof mbi
);
194 if (mbi
.State
!= MEM_FREE
)
195 addr
= (UINT_PTR
) mbi
.AllocationBase
- real_size
199 /* If we got an address, remember it for the next allocation attempt. */
201 current
= (UINT_PTR
) real_stackaddr
;
204 return real_stackaddr
;
207 thread_allocator () : current (THREAD_STORAGE_HIGH
)
209 alloc_func
= wincap
.has_extended_mem_api () ? &_alloc
: &_alloc_old
;
211 PVOID
alloc (SIZE_T size
)
213 return (this->*alloc_func
) (size
);
217 thread_allocator thr_alloc NO_COPY
;
220 create_new_main_thread_stack (PVOID
&allocationbase
)
222 PIMAGE_DOS_HEADER dosheader
;
223 PIMAGE_NT_HEADERS ntheader
;
229 dosheader
= (PIMAGE_DOS_HEADER
) GetModuleHandle (NULL
);
230 ntheader
= (PIMAGE_NT_HEADERS
)
231 ((PBYTE
) dosheader
+ dosheader
->e_lfanew
);
232 stacksize
= ntheader
->OptionalHeader
.SizeOfStackReserve
;
233 stacksize
= roundup2 (stacksize
, wincap
.allocation_granularity ());
236 = thr_alloc
.alloc (ntheader
->OptionalHeader
.SizeOfStackReserve
);
237 guardsize
= wincap
.def_guard_page_size ();
238 commitsize
= ntheader
->OptionalHeader
.SizeOfStackCommit
;
239 commitsize
= roundup2 (commitsize
, wincap
.page_size ());
240 if (commitsize
> stacksize
- guardsize
- wincap
.page_size ())
241 commitsize
= stacksize
- guardsize
- wincap
.page_size ();
242 stacklimit
= (PBYTE
) allocationbase
+ stacksize
- commitsize
- guardsize
;
243 /* Setup guardpage. */
244 if (!VirtualAlloc (stacklimit
, guardsize
,
245 MEM_COMMIT
, PAGE_READWRITE
| PAGE_GUARD
))
247 /* Setup committed region. */
248 stacklimit
+= guardsize
;
249 if (!VirtualAlloc (stacklimit
, commitsize
, MEM_COMMIT
, PAGE_READWRITE
))
251 NtCurrentTeb()->Tib
.StackBase
= ((PBYTE
) allocationbase
+ stacksize
);
252 NtCurrentTeb()->Tib
.StackLimit
= stacklimit
;
253 return ((PBYTE
) allocationbase
+ stacksize
- 16);
257 create_posix_thread (LPTHREAD_START_ROUTINE thread_func
, PVOID thread_arg
,
258 PVOID stackaddr
, ULONG stacksize
, ULONG guardsize
,
259 DWORD creation_flags
, LPDWORD thread_id
)
261 PVOID real_stackaddr
= NULL
;
262 ULONG real_stacksize
= 0;
263 ULONG real_guardsize
= 0;
264 pthread_wrapper_arg
*wrapper_arg
;
265 HANDLE thread
= NULL
;
267 wrapper_arg
= (pthread_wrapper_arg
*) ccalloc (HEAP_STR
, 1,
268 sizeof *wrapper_arg
);
271 SetLastError (ERROR_OUTOFMEMORY
);
274 wrapper_arg
->func
= thread_func
;
275 wrapper_arg
->arg
= thread_arg
;
279 /* If the application provided the stack, just use it. There won't
280 be any stack overflow handling! */
281 wrapper_arg
->stackaddr
= (PBYTE
) stackaddr
;
282 wrapper_arg
->stackbase
= (PBYTE
) stackaddr
+ stacksize
;
286 PBYTE real_stacklimit
;
288 /* If not, we have to create the stack here. */
289 real_stacksize
= roundup2 (stacksize
, wincap
.page_size ());
290 real_guardsize
= roundup2 (guardsize
, wincap
.page_size ());
291 /* Add the guardsize to the stacksize */
292 real_stacksize
+= real_guardsize
;
293 /* Take dead zone page into account, which always stays uncommited. */
294 real_stacksize
+= wincap
.page_size ();
295 /* Now roundup the result to the next allocation boundary. */
296 real_stacksize
= roundup2 (real_stacksize
,
297 wincap
.allocation_granularity ());
299 real_stackaddr
= thr_alloc
.alloc (real_stacksize
);
302 /* Set up committed region. We set up the stack like the OS does,
303 with a reserved region, the guard pages, and a commited region.
304 We commit the stack commit size from the executable header, but
305 at least PTHREAD_STACK_MIN (64K). */
306 static ULONG exe_commitsize
;
310 PIMAGE_DOS_HEADER dosheader
;
311 PIMAGE_NT_HEADERS ntheader
;
313 dosheader
= (PIMAGE_DOS_HEADER
) GetModuleHandle (NULL
);
314 ntheader
= (PIMAGE_NT_HEADERS
)
315 ((PBYTE
) dosheader
+ dosheader
->e_lfanew
);
316 exe_commitsize
= ntheader
->OptionalHeader
.SizeOfStackCommit
;
317 exe_commitsize
= roundup2 (exe_commitsize
, wincap
.page_size ());
319 ULONG commitsize
= exe_commitsize
;
320 if (commitsize
> real_stacksize
- real_guardsize
- wincap
.page_size ())
321 commitsize
= real_stacksize
- real_guardsize
- wincap
.page_size ();
322 else if (commitsize
< PTHREAD_STACK_MIN
)
323 commitsize
= PTHREAD_STACK_MIN
;
324 real_stacklimit
= (PBYTE
) real_stackaddr
+ real_stacksize
325 - commitsize
- real_guardsize
;
326 if (!VirtualAlloc (real_stacklimit
, real_guardsize
, MEM_COMMIT
,
327 PAGE_READWRITE
| PAGE_GUARD
))
329 real_stacklimit
+= real_guardsize
;
330 if (!VirtualAlloc (real_stacklimit
, commitsize
, MEM_COMMIT
,
334 wrapper_arg
->stackaddr
= (PBYTE
) real_stackaddr
;
335 wrapper_arg
->stackbase
= (PBYTE
) real_stackaddr
+ real_stacksize
;
336 wrapper_arg
->stacklimit
= real_stacklimit
;
337 wrapper_arg
->guardsize
= real_guardsize
;
339 /* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter so only the
340 minimum size for a thread stack is reserved by the OS. Note that we
341 reserve a 256K stack, not 64K, otherwise the thread creation might
342 crash the process due to a stack overflow. */
343 thread
= CreateThread (&sec_none_nih
, 4 * PTHREAD_STACK_MIN
,
344 pthread_wrapper
, wrapper_arg
,
345 creation_flags
| STACK_SIZE_PARAM_IS_A_RESERVATION
,
349 if (!thread
&& real_stackaddr
)
351 /* Don't report the wrong error even though VirtualFree is very unlikely
353 DWORD err
= GetLastError ();
354 VirtualFree (real_stackaddr
, 0, MEM_RELEASE
);